# 設定全局 Chunk 選項:隱藏警告與訊息,保持報告乾淨
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.width = 10, fig.height = 6)
# === 載入全域必要套件 ===
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.5.2
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(survival)
library(survminer)
## Warning: package 'survminer' was built under R version 4.5.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.5.2
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(flextable)
## Warning: package 'flextable' was built under R version 4.5.2
##
## Attaching package: 'flextable'
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
## The following objects are masked from 'package:ggpubr':
##
## border, font, rotate
library(officer)
## Warning: package 'officer' was built under R version 4.5.2
library(ggplot2)
library(broom)
## Warning: package 'broom' was built under R version 4.5.2
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(grid) # 💡 新增這行!
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.2
library(forcats)
## Warning: package 'forcats' was built under R version 4.5.2
# === 全域參數設定 ===
stages <- c("1A","1B","2A","2B","3A","3B","3C")
censor_years <- 5
此區塊進行日期轉換、病歷 ID 去重複(依據側性、診斷時間等規則)、篩選 HER2 陽性族群,並動態定義最終期別(有做 NAC 看臨床分期,未就做看病理分期)。
精準處理 RFS 與 OS 事件,並將存活時間強制截尾於 5 年(60個月),以利後續 KM 與 Cox 分析。保留 1332 人母體,未曾 Disease-free 者(Recur_Type=70)於 RFS 設為 NA。
## 📄 原始資料筆數: 10319
## ✅ ID 數量分類:
## 一筆: 9226
## 二筆: 539
## 三筆以上: 5
## ✅ 整合完成!最終資料筆數: 10304
## 👉 Step 1 (HER2+ 總人數): 1757
## 👉 Step 2 (有開刀): 1652
## 👉 Step 3 (未做 NAC): 1333
## 👉 Step 6 (pT1N0M0 極早期小腫瘤總數): 366
## 👉 Step 7 (全體 HER2+ 且有明確期別的分析人數): 1332
## 🎉 資料清理完成!已產出 'final_her2_data.csv'。
依照 4 種治療組合 (Group A~D) 呈現人口學與臨床病理特徵。
# ==========================================
# 治療設定與製表 (Table 1) - ✅ 1332人完美版
# ==========================================
library(dplyr)
library(gtsummary)
library(flextable)
library(officer)
# === 1. 讀取 1,332 人的完整資料 ===
# 請確保 final_her2_data.csv 已經是 1332 筆的版本
df_all <- read.csv("final_her2_data.csv")
# === 2. 變數轉換與整理 ===
df_analysis <- df_all %>%
mutate(
# --- 治療分組 ---
group = case_when(
CT == 0 & TT == 0 ~ "Group A",
CT == 0 & TT == 1 ~ "Group B",
CT == 1 & TT == 0 ~ "Group C",
CT == 1 & TT == 1 ~ "Group D",
TRUE ~ NA_character_
),
group = factor(group, levels = c("Group A", "Group B", "Group C", "Group D")),
# --- 臨床特徵轉換 ---
age_group = ifelse(Age < 50, "< 50 y/o", "≥ 50 y/o"),
hr_status = case_when(
grepl("HR\\+", ihc) ~ "HR positive",
grepl("HR\\-", ihc) ~ "HR negative",
TRUE ~ NA_character_
),
hr_status = factor(hr_status, levels = c("HR negative", "HR positive")),
grade_f = factor(Grade, levels = c(1, 2, 3), labels = c("I", "II", "III")),
histology = case_when(
Hist == 8500 ~ "IDC",
Hist == 8520 ~ "ILC",
TRUE ~ "Others"
),
histology = factor(histology, levels = c("IDC", "ILC", "Others")),
endocrine = factor(ifelse(HT == 1, "Yes", "No"), levels = c("No", "Yes")),
# --- 放射線治療 (修復 100% bug,使用正確欄位) ---
rt_done = ifelse(!is.na(X4.2.1.1.RT.Target.Summary) & as.numeric(X4.2.1.1.RT.Target.Summary) > 0, "Done", "None"),
rt_done = factor(rt_done, levels = c("None", "Done")),
# --- 追蹤月數 (使用原始未截尾的 surv_year_os,呈現真實追蹤時間) ---
follow_up_months = surv_year_os * 12
)
# === 3. 使用 gtsummary 製作 Table 1 ===
tbl1 <- df_analysis %>%
select(
# 這裡放進去的變數,都會出現在 Table 1 裡面!
# final_stage 就是你要的 patient counts by stage
group, Age, age_group, hr_status, grade_f, histology,
endocrine, rt_done, follow_up_months, final_stage
) %>%
tbl_summary(
by = group,
type = list(
Age ~ "continuous2",
follow_up_months ~ "continuous2"
),
statistic = list(
all_continuous2() ~ "{mean} ± {sd}; {median} [{min}, {max}]",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no",
label = list(
Age ~ "Age",
age_group ~ "Age group",
hr_status ~ "Hormone receptor status",
grade_f ~ "Tumor grade",
histology ~ "Histology",
endocrine ~ "Endocrine therapy",
rt_done ~ "Radiotherapy",
follow_up_months ~ "Follow-up (months)",
final_stage ~ "Final stage" # <--- 這裡會顯示各期別與各治療組的交叉人數
)
) %>%
add_overall(last = TRUE) %>%
add_p(test = everything() ~ "chisq.test") %>%
modify_header(
label = "**Characteristics**",
# gtsummary 會自動抓取 1332 人並計算各組 N 數
all_stat_cols() ~ "**{level}**\n(n={n})"
) %>%
bold_labels()
# === 4. 輸出結果 ===
# 在 R Markdown 中顯示表格
tbl1
| Characteristics | Group A (n=240)1 | Group B (n=30)1 | Group C (n=70)1 | Group D (n=992)1 | Overall (n=1332)1 | p-value2 |
|---|---|---|---|---|---|---|
| Age | <0.001 | |||||
| Mean ± SD; Median [Min, Max] | 58 ± 12; 58 [32, 89] | 64 ± 16; 68 [32, 86] | 55 ± 12; 57 [30, 79] | 54 ± 11; 54 [20, 87] | 55 ± 12; 55 [20, 89] | |
| Age group | 0.008 | |||||
| < 50 y/o | 57 (24%) | 7 (23%) | 22 (31%) | 344 (35%) | 430 (32%) | |
| ≥ 50 y/o | 183 (76%) | 23 (77%) | 48 (69%) | 648 (65%) | 902 (68%) | |
| Hormone receptor status | <0.001 | |||||
| HR negative | 148 (62%) | 9 (30%) | 23 (33%) | 428 (43%) | 608 (46%) | |
| HR positive | 92 (38%) | 21 (70%) | 47 (67%) | 564 (57%) | 724 (54%) | |
| Tumor grade | <0.001 | |||||
| I | 7 (5.5%) | 0 (0%) | 0 (0%) | 4 (0.7%) | 11 (1.5%) | |
| II | 76 (60%) | 7 (44%) | 27 (53%) | 270 (49%) | 380 (51%) | |
| III | 44 (35%) | 9 (56%) | 24 (47%) | 276 (50%) | 353 (47%) | |
| Histology | 0.070 | |||||
| IDC | 224 (93%) | 27 (90%) | 67 (96%) | 946 (95%) | 1,264 (95%) | |
| ILC | 2 (0.8%) | 2 (6.7%) | 0 (0%) | 12 (1.2%) | 16 (1.2%) | |
| Others | 14 (5.8%) | 1 (3.3%) | 3 (4.3%) | 34 (3.4%) | 52 (3.9%) | |
| Endocrine therapy | 89 (37%) | 21 (70%) | 43 (61%) | 536 (54%) | 689 (52%) | <0.001 |
| Radiotherapy | <0.001 | |||||
| None | 168 (70%) | 22 (73%) | 49 (70%) | 489 (49%) | 728 (55%) | |
| Done | 72 (30%) | 8 (27%) | 21 (30%) | 503 (51%) | 604 (45%) | |
| Follow-up (months) | 0.10 | |||||
| Mean ± SD; Median [Min, Max] | 66 ± 41; 60 [0, 155] | 65 ± 39; 58 [9, 147] | 77 ± 42; 78 [5, 156] | 75 ± 39; 71 [0, 163] | 73 ± 40; 67 [0, 163] | |
| Final stage | <0.001 | |||||
| 1A | 191 (80%) | 12 (40%) | 30 (43%) | 251 (25%) | 484 (36%) | |
| 1B | 0 (0%) | 0 (0%) | 0 (0%) | 7 (0.7%) | 7 (0.5%) | |
| 2A | 35 (15%) | 5 (17%) | 28 (40%) | 287 (29%) | 355 (27%) | |
| 2B | 6 (2.5%) | 6 (20%) | 3 (4.3%) | 175 (18%) | 190 (14%) | |
| 3A | 2 (0.8%) | 2 (6.7%) | 4 (5.7%) | 130 (13%) | 138 (10%) | |
| 3B | 4 (1.7%) | 0 (0%) | 0 (0%) | 17 (1.7%) | 21 (1.6%) | |
| 3C | 2 (0.8%) | 5 (17%) | 5 (7.1%) | 125 (13%) | 137 (10%) | |
| 1 n (%) | ||||||
| 2 Pearson’s Chi-squared test | ||||||
# 匯出成 Word 檔
tbl1 %>%
as_flex_table() %>%
autofit() %>%
save_as_docx(path = "Table1_Clinicopathologic_Characteristics.docx")
cat("✅ Table 1 已成功產出並存為 Table1_Clinicopathologic_Characteristics.docx")
## ✅ Table 1 已成功產出並存為 Table1_Clinicopathologic_Characteristics.docx
# ==========================================
# 存活分析:KM Plot (1A~3C全期別展示) + Risk Table + 檢定結果
# ==========================================
library(survival)
library(survminer)
library(dplyr)
library(gridExtra)
library(ggpubr) # 💡 新增:用來畫底下的統計檢定表格
library(broom)
# 確保資料格式與 Reference 組別設定正確
df_analysis <- df_analysis %>%
filter(!is.na(group)) %>%
mutate(
group = relevel(factor(group), ref = "Group A"),
final_stage = relevel(factor(final_stage), ref = "1A")
)
# 自訂經典四色 (Group A:紅, B:藍, C:綠, D:紫)
km_colors <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3")
for (stg in stages) {
df_stage <- df_analysis %>% filter(final_stage == stg)
if (nrow(df_stage) > 0 && n_distinct(df_stage$group) >= 2) {
# ==========================================
# [RFS] 無復發存活率
# ==========================================
fit_rfs <- survfit(Surv(surv_year_rfs_5y, relapse_5y) ~ group, data = df_stage)
p_rfs <- ggsurvplot(
fit_rfs, data = df_stage,
pval = TRUE,
risk.table = TRUE,
xlim = c(0, 5),
break.time.by = 1,
palette = km_colors,
title = paste0("5-Year RFS - Stage ", stg),
xlab = "Time (Years)", ylab = "RFS Probability",
legend.title = "Treatment",
ggtheme = theme_minimal(base_size = 14),
risk.table.height = 0.25,
risk.table.y.text = TRUE, # ✅ 恢復文字標註 (Group A, B, C, D)
risk.table.y.text.col = TRUE, # ✅ 讓文字保持對應顏色,清楚又好看
fontsize = 4.5,
tables.theme = theme_cleantable()
)
# RFS 底下的檢定表格 (Cox HR vs Group A)
cox_rfs <- tryCatch(coxph(Surv(surv_year_rfs_5y, relapse_5y) ~ group, data = df_stage), error = function(e) NULL)
if (!is.null(cox_rfs)) {
cox_tbl_rfs <- tidy(cox_rfs, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
Treatment = sub("group", "", term),
`HR (95% CI)` = sprintf("%.2f (%.2f - %.2f)", estimate, conf.low, conf.high),
`P-value` = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
) %>% select(Treatment, `HR (95% CI)`, `P-value`)
p_rfs_table <- ggtexttable(cox_tbl_rfs, rows = NULL, theme = ttheme("minimal", base_size = 12))
} else {
p_rfs_table <- ggparagraph(text = "Not estimable", size = 12)
}
# ==========================================
# [OS] 整體存活率
# ==========================================
fit_os <- survfit(Surv(surv_year_os_5y, Vita_m_5y) ~ group, data = df_stage)
p_os <- ggsurvplot(
fit_os, data = df_stage,
pval = TRUE,
risk.table = TRUE,
xlim = c(0, 5),
break.time.by = 1,
palette = km_colors,
title = paste0("5-Year OS - Stage ", stg),
xlab = "Time (Years)", ylab = "OS Probability",
legend.title = "Treatment",
ggtheme = theme_minimal(base_size = 14),
risk.table.height = 0.25,
risk.table.y.text = TRUE, # ✅ 恢復文字標註
risk.table.y.text.col = TRUE,
fontsize = 4.5,
tables.theme = theme_cleantable()
)
# OS 底下的檢定表格 (Cox HR vs Group A)
cox_os <- tryCatch(coxph(Surv(surv_year_os_5y, Vita_m_5y) ~ group, data = df_stage), error = function(e) NULL)
if (!is.null(cox_os)) {
cox_tbl_os <- tidy(cox_os, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
Treatment = sub("group", "", term),
`HR (95% CI)` = sprintf("%.2f (%.2f - %.2f)", estimate, conf.low, conf.high),
`P-value` = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
) %>% select(Treatment, `HR (95% CI)`, `P-value`)
p_os_table <- ggtexttable(cox_tbl_os, rows = NULL, theme = ttheme("minimal", base_size = 12))
} else {
p_os_table <- ggparagraph(text = "Not estimable", size = 12)
}
# ==========================================
# 組合輸出 (KM 圖 + Risk Table + Cox 檢定表)
# ==========================================
grid.newpage()
grid.arrange(
p_rfs$plot, p_os$plot,
p_rfs$table, p_os$table,
p_rfs_table, p_os_table,
ncol = 2,
heights = c(4, 1.2, 1) # 高度比例:KM 圖最大,Risk Table 其次,檢定表在最下面
)
}
}
評估治療組別、年齡、HR狀態、腫瘤分化與期別對 RFS 與 OS 的多變項影響。
library(survival)
library(dplyr)
library(broom)
library(flextable)
library(officer)
# === 定義生存物件 (改用我們整理好的、最乾淨完整的 df_analysis) ===
# OS (使用完整的追蹤年份與事件)
surv_os <- Surv(time = df_analysis$surv_year_os, event = df_analysis$Vita_m == 1)
# RFS (使用完整的追蹤年份與事件)
surv_rfs <- Surv(time = df_analysis$surv_year_rfs, event = df_analysis$rfs_event_raw == 1)
# === 要跑的變數 ===
vars <- c("group", "Age", "hr_status", "grade_f", "final_stage")
# --- function: 單因子 + 多因子 Cox ---
run_cox <- function(surv_obj, data, vars) {
# 單因子 Cox
uni_results <- lapply(vars, function(v) {
f <- as.formula(paste("surv_obj ~", v))
coxph(f, data = data)
})
uni_table <- lapply(uni_results, tidy, exponentiate = TRUE, conf.int = TRUE) %>%
bind_rows(.id = "Variable") %>%
mutate(Variable = vars[as.numeric(Variable)]) %>%
select(Variable, term, estimate, conf.low, conf.high, p.value)
# 多因子 Cox
multi_model <- coxph(surv_obj ~ group + Age + hr_status + grade_f + final_stage,
data = data)
multi_table <- tidy(multi_model, exponentiate = TRUE, conf.int = TRUE) %>%
select(term, estimate, conf.low, conf.high, p.value)
# 合併單因子與多因子
cox_table <- uni_table %>%
rename(Uni_HR = estimate, Uni_LCL = conf.low, Uni_UCL = conf.high, Uni_p = p.value) %>%
left_join(
multi_table %>%
rename(Multi_HR = estimate, Multi_LCL = conf.low, Multi_UCL = conf.high, Multi_p = p.value),
by = c("term")
)
# ✨ 智慧攔截:處理 Inf 或因為 0 事件導致的荒謬超大數值 (>100) → 轉為 NA
cox_table <- cox_table %>%
mutate(across(c(Uni_HR, Uni_LCL, Uni_UCL, Multi_HR, Multi_LCL, Multi_UCL),
~ ifelse(. > 100 | is.infinite(.), NA, .)))
return(cox_table)
}
# === 格式化成表格(乾淨 Level + 顯示 ref group) ===
format_cox <- function(cox_table, title) {
cox_table_fmt <- cox_table %>%
mutate(
# 乾淨的 Level 名稱
Level = case_when(
grepl("group", term) ~ sub("group", "", term),
grepl("hr_status", term) ~ sub("hr_status", "", term),
grepl("grade_f", term) ~ sub("grade_f", "", term),
grepl("final_stage", term) ~ sub("final_stage", "", term),
term == "Age" ~ "Age (per year increase)",
TRUE ~ term
),
# 數值格式化 (遇到 NA 就顯示 Not estimable)
Uni = ifelse(is.na(Uni_HR), "Not estimable",
sprintf("%.2f (%.2f–%.2f)", Uni_HR, Uni_LCL, Uni_UCL)),
Multi = ifelse(is.na(Multi_HR), "Not estimable",
sprintf("%.2f (%.2f–%.2f)", Multi_HR, Multi_LCL, Multi_UCL)),
Uni_p = ifelse(is.na(Uni_p), "", sprintf("%.3f", Uni_p)),
Multi_p = ifelse(is.na(Multi_p), "", sprintf("%.3f", Multi_p))
) %>%
select(Variable, Level, Uni, Uni_p, Multi, Multi_p)
# 各變數的 reference group
ref_rows <- tribble(
~Variable, ~Level, ~Uni, ~Uni_p, ~Multi, ~Multi_p,
"group", "Group A (ref)", "", "", "", "",
"hr_status", "HR negative (ref)", "", "", "", "",
"grade_f", "Grade I (ref)", "", "", "", "",
"final_stage", "Stage 1A (ref)", "", "", "", ""
)
# 插入 ref 並讓它排在各變數最前面
cox_table_fmt <- bind_rows(cox_table_fmt, ref_rows) %>%
mutate(Level = trimws(Level)) %>%
arrange(factor(Variable, levels = c("group","Age","hr_status","grade_f","final_stage")),
case_when(
grepl("ref", Level) ~ 0, # ref 永遠排最上面
TRUE ~ 1
),
Level)
flextable(cox_table_fmt) %>%
set_header_labels(
Variable = "Variable",
Level = "Level",
Uni = "Univariate HR (95% CI)",
Uni_p = "Univariate p",
Multi = "Multivariate HR (95% CI)",
Multi_p = "Multivariate p"
) %>%
add_header_row(values = title, colwidths = 6) %>%
autofit() %>%
bold(i = ~ as.numeric(Uni_p) < 0.05 | as.numeric(Multi_p) < 0.05, bold = TRUE)
}
# === 跑 OS & RFS ===
cox_os <- run_cox(surv_os, df_analysis, vars)
cox_rfs <- run_cox(surv_rfs, df_analysis, vars)
# === 轉換成表格 (直接在 RMarkdown 顯示) ===
tbl_os <- format_cox(cox_os, "Table 3a. Cox Regression for Overall Survival (OS)")
tbl_rfs <- format_cox(cox_rfs, "Table 3b. Cox Regression for Recurrence-Free Survival (RFS)")
tbl_os
Table 3a. Cox Regression for Overall Survival (OS) | |||||
|---|---|---|---|---|---|
Variable | Level | Univariate HR (95% CI) | Univariate p | Multivariate HR (95% CI) | Multivariate p |
group | Group A (ref) | ||||
group | Group B | 2.33 (0.94–5.76) | 0.068 | 0.96 (0.35–2.58) | 0.928 |
group | Group C | 1.29 (0.59–2.81) | 0.524 | 0.87 (0.34–2.18) | 0.760 |
group | Group D | 0.59 (0.36–0.97) | 0.039 | 0.38 (0.20–0.73) | 0.003 |
Age | Age (per year increase) | 1.07 (1.05–1.09) | 0.000 | 1.05 (1.03–1.07) | 0.000 |
hr_status | HR negative (ref) | ||||
hr_status | HR positive | 0.81 (0.54–1.22) | 0.320 | 0.73 (0.47–1.16) | 0.181 |
grade_f | Grade I (ref) | ||||
grade_f | II | 0.49 (0.12–2.02) | 0.322 | 0.18 (0.04–0.85) | 0.030 |
grade_f | III | 0.53 (0.13–2.19) | 0.382 | 0.12 (0.02–0.58) | 0.009 |
final_stage | Stage 1A (ref) | ||||
final_stage | 1B | 0.00 (0.00–NA) | 0.994 | 0.00 (0.00–NA) | 0.995 |
final_stage | 2A | 2.53 (1.26–5.09) | 0.009 | 3.49 (1.55–7.83) | 0.003 |
final_stage | 2B | 3.75 (1.81–7.78) | 0.000 | 7.41 (3.03–18.12) | 0.000 |
final_stage | 3A | 3.72 (1.70–8.16) | 0.001 | 7.71 (3.06–19.44) | 0.000 |
final_stage | 3B | 9.27 (2.99–28.75) | 0.000 | 7.66 (2.24–26.13) | 0.001 |
final_stage | 3C | 7.23 (3.62–14.46) | 0.000 | 9.59 (3.97–23.13) | 0.000 |
tbl_rfs
Table 3b. Cox Regression for Recurrence-Free Survival (RFS) | |||||
|---|---|---|---|---|---|
Variable | Level | Univariate HR (95% CI) | Univariate p | Multivariate HR (95% CI) | Multivariate p |
group | Group A (ref) | ||||
group | Group B | 2.09 (0.85–5.14) | 0.107 | 0.47 (0.12–1.74) | 0.255 |
group | Group C | 1.10 (0.51–2.37) | 0.817 | 0.48 (0.18–1.24) | 0.128 |
group | Group D | 0.66 (0.41–1.06) | 0.084 | 0.24 (0.12–0.48) | 0.000 |
Age | Age (per year increase) | 1.03 (1.01–1.04) | 0.001 | 1.01 (0.99–1.03) | 0.464 |
hr_status | HR negative (ref) | ||||
hr_status | HR positive | 0.72 (0.50–1.04) | 0.080 | 0.73 (0.46–1.16) | 0.180 |
grade_f | Grade I (ref) | ||||
grade_f | II | Not estimable | 0.996 | Not estimable | 0.995 |
grade_f | III | Not estimable | 0.996 | Not estimable | 0.995 |
final_stage | Stage 1A (ref) | ||||
final_stage | 1B | 0.00 (0.00–NA) | 0.994 | 0.00 (0.00–NA) | 0.997 |
final_stage | 2A | 2.40 (1.33–4.33) | 0.004 | 5.18 (2.31–11.63) | 0.000 |
final_stage | 2B | 1.91 (0.93–3.93) | 0.079 | 5.22 (1.93–14.16) | 0.001 |
final_stage | 3A | 2.06 (0.94–4.50) | 0.070 | 6.11 (2.24–16.69) | 0.000 |
final_stage | 3B | 7.44 (2.75–20.18) | 0.000 | 10.98 (3.23–37.25) | 0.000 |
final_stage | 3C | 7.50 (4.21–13.36) | 0.000 | 18.57 (7.71–44.74) | 0.000 |
觀察 2007-2022 年間,不同期別與 HR 狀態下採用 NAC 的比例變化。
summary_nac <- df_analysis %>%
mutate(year = year(DIAG_DT), NAC = as.integer(NAC_binary == 1)) %>%
filter(!is.na(year), !is.na(hr_status)) %>%
group_by(year, final_stage, hr_status) %>%
summarise(total = n(), nac_rate = sum(NAC) / total, .groups = "drop") %>%
filter(final_stage %in% c("1A", "2A", "2B", "3A", "3B", "3C"))
ggplot(summary_nac, aes(x = year, y = nac_rate, color = hr_status)) +
geom_line(size = 1) + geom_point(size = 2) +
facet_wrap(~ final_stage, ncol = 3) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(title = "NAC Adoption Rate Over Time by Stage and HR Status", x = "Year of Diagnosis", y = "NAC Rate", color = "HR Status") +
theme_minimal(base_size = 14) + theme(legend.position = "bottom")