# Load data
data <- read_excel("data/patients.xlsx")
# Clean column names
data_clean <- data %>% janitor::clean_names()
# Filter out IEC and AUTO (keep only allo)
data_clean <- subset(data_clean, !(type_at_bmt %in% c("IEC", "AUTO")))
# Recode 1-year outcome
data_clean <- data_clean %>%
mutate(alive1year_recoded = ifelse(alive1year == "Yes", 1,
ifelse(alive1year == "No", 0, NA)))
# Keep only evaluable patients
data_1yr <- subset(data_clean, !is.na(alive1year_recoded))
head(data_1yr)
## # A tibble: 6 × 44
## tx_date age_group muh_number full_name date_of_birth sex
## <dttm> <chr> <chr> <chr> <dttm> <chr>
## 1 2022-01-12 00:00:00 Adult 7756425 Bellamy, J… 1949-05-14 00:00:00 Fema…
## 2 2022-01-14 00:00:00 Adult 7774482 Pitt, Brys… 1998-05-11 00:00:00 Male
## 3 2022-01-21 00:00:00 Adult 953678 Klecha, Mi… 1951-11-02 00:00:00 Fema…
## 4 2022-01-25 00:00:00 Adult 7750707 Brentin, R… 1954-04-24 00:00:00 Male
## 5 2022-01-28 00:00:00 Adult 5842909 Smith, Mat… 1959-12-11 00:00:00 Male
## 6 2022-02-23 00:00:00 Adult 7451324 Gardner, J… 1951-05-08 00:00:00 Male
## # ℹ 38 more variables: diagnosis <chr>, subdiagnosis <chr>, bmt_status <chr>,
## # type_at_bmt <chr>, sub_type_at_bmt <chr>, date_of_dx <dttm>, outpt <lgl>,
## # conditioning_at_bmt <chr>, protocol_at_bmt <chr>, status_at_tx <chr>,
## # current_status_of_disease <chr>, anc_date <dttm>, anc_comment <chr>,
## # platelet_date <dttm>, platelet_comment_50 <chr>, rfi_classification <chr>,
## # hla <chr>, acute_gvhd <lgl>, acute_gvhd_peak <dbl>, chronic_gvhd <lgl>,
## # chronic_gvhd_peak <chr>, cmv_pt <lgl>, cmv_donor <lgl>, …
ggplot(data_clean, aes(x = age_at_tx)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black") +
labs(title = "Age Distribution (Allo Patients)",
x = "Age at Transplant", y = "Count")
p_outcome_age <- ggplot(data_1yr,
aes(x=factor(alive1year_recoded),
y=age_at_tx,
fill=factor(alive1year_recoded))) +
geom_boxplot() +
stat_compare_means(method="wilcox.test",
label.y=max(data_1yr$age_at_tx,na.rm=TRUE)+2) +
labs(title="Age vs Alive at 1 Year",
x="1-Year Outcome (0=No, 1=Yes)",
y="Age at Transplant")
p_outcome_age
wilcox.test(age_at_tx ~ alive1year_recoded, data=data_1yr)
##
## Wilcoxon rank sum test with continuity correction
##
## data: age_at_tx by alive1year_recoded
## W = 2114, p-value = 0.1801
## alternative hypothesis: true location shift is not equal to 0
cmi_totals <- data_1yr %>%
filter(!is.na(cmi)) %>%
group_by(cmi) %>%
summarise(total_n=n(),.groups="drop")
ggplot(cmi_totals,aes(x=factor(cmi),y=total_n))+
geom_col(fill="steelblue")+
geom_text(aes(label=paste0("n=",total_n)),vjust=-0.3)+
labs(title="Patients by CMI",x="CMI Score",y="N")
ggplot(subset(data_1yr,!is.na(cmi)),
aes(x=factor(cmi),fill=factor(alive1year_recoded)))+
geom_bar(position="fill")+
geom_text(stat="count",aes(label=paste0("n=",..count..)),
position=position_fill(vjust=0.5),color="white")+
scale_fill_manual(values=c("0"="tomato","1"="seagreen"),
labels=c("0"="No","1"="Yes"))+
labs(title="1-Year Outcome by CMI",
x="CMI",y="Proportion",fill="Alive at 1y")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data_1yr <- data_1yr %>%
filter(!is.na(age_at_tx),!is.na(cmi)) %>%
mutate(risk_score=age_at_tx+5*cmi)
roc_score <- roc(data_1yr$alive1year_recoded ~ data_1yr$risk_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls > cases
auc(roc_score)
## Area under the curve: 0.6099
plot(roc_score,col="purple",lwd=2,main="ROC: Combined Risk Score")
abline(a=0,b=1,lty=2,col="grey")
best_cut <- coords(roc_score,"best",
ret=c("threshold","sensitivity","specificity"))
best_cut
## threshold sensitivity specificity
## 1 67.06108 0.4672897 0.8064516
# Prepare survival dataset
surv_data <- data_clean %>%
filter(!is.na(tx_date), !is.na(age_at_tx)) %>%
mutate(
tx_date = as.Date(tx_date, format = "%m/%d/%Y"),
death_date = as.Date(dod, format = "%m/%d/%Y"),
# Event times (days and months, capped at 12 months)
time_days = ifelse(!is.na(death_date),
as.numeric(difftime(death_date, tx_date, units = "days")),
365),
time_months = pmin(time_days / 30.44, 12),
# Event indicator
event = ifelse(!is.na(death_date) & time_days <= 365, 1, 0),
# Risk score
risk_score = age_at_tx + 5 * cmi,
# Risk groups
risk67 = ifelse(risk_score >= 67, "High (≥67)", "Low (<67)"),
risk80 = ifelse(risk_score >= 80, "High (≥80)", "Low (<80)")
) %>%
mutate(
# Age groups (3 bins)
age_group3 = case_when(
age_at_tx < 65 ~ "<65",
age_at_tx >= 65 & age_at_tx < 75 ~ "65–74",
age_at_tx >= 75 ~ "≥75"
),
# CMI groups (3 bins)
cmi_group3 = case_when(
cmi < 3 ~ "<3",
cmi >= 3 & cmi < 6 ~ "3–6",
cmi >= 6 ~ "≥6"
)
)
# Factor ordering
surv_data$age_group3 <- factor(surv_data$age_group3,
levels = c("<65", "65–74", "≥75"))
surv_data$cmi_group3 <- factor(surv_data$cmi_group3,
levels = c("<3", "3–6", "≥6"))
# Survival object
surv_obj <- Surv(time = surv_data$time_months, event = surv_data$event)
fit_overall <- survfit(surv_obj~1,data=surv_data)
summary(fit_overall,times=12)
## Call: survfit(formula = surv_obj ~ 1, data = surv_data)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 12 14 33 0.835 0.0262 0.785 0.888
ggsurvplot(fit_overall,conf.int=TRUE,risk.table=TRUE,
xlab="Months",ylab="Survival Probability",
title="Overall Survival (1 Year)",
break.time.by=3,xlim=c(0,12))
fit_age3 <- survfit(surv_obj ~ age_group3, data = surv_data)
summary(fit_age3, times = 12)
## Call: survfit(formula = surv_obj ~ age_group3, data = surv_data)
##
## age_group3=<65
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 8.0000 14.0000 0.8772 0.0307 0.8190
## upper 95% CI
## 0.9396
##
## age_group3=65–74
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 4.0000 19.0000 0.7595 0.0481 0.6709
## upper 95% CI
## 0.8598
##
## age_group3=≥75
## time n.risk n.event survival std.err lower 95% CI
## 12 2 0 1 0 1
## upper 95% CI
## 1
ggsurvplot(fit_age3,
conf.int = FALSE, pval = TRUE, risk.table = TRUE,
xlab = "Months since Transplant",
ylab = "Survival Probability",
title = "Survival by Age Groups (<65, 65–74, ≥75)",
legend.title = "Age Group",
break.time.by = 3, xlim = c(0,12))
fit_cmi3 <- survfit(surv_obj ~ cmi_group3, data = surv_data)
summary(fit_cmi3, times = 12)
## Call: survfit(formula = surv_obj ~ cmi_group3, data = surv_data)
##
## 11 observations deleted due to missingness
## cmi_group3=<3
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 4.0000 15.0000 0.8718 0.0309 0.8133
## upper 95% CI
## 0.9345
##
## cmi_group3=3–6
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 6.0000 15.0000 0.7619 0.0537 0.6637
## upper 95% CI
## 0.8747
##
## cmi_group3=≥6
## time n.risk n.event survival std.err lower 95% CI
## 12.000 3.000 1.000 0.889 0.105 0.706
## upper 95% CI
## 1.000
ggsurvplot(fit_cmi3,
conf.int = FALSE, pval = TRUE, risk.table = TRUE,
xlab = "Months since Transplant",
ylab = "Survival Probability",
title = "Survival by CMI Groups (<3, 3–6, ≥6)",
legend.title = "CMI Group",
break.time.by = 3, xlim = c(0,12))
fit_risk67 <- survfit(surv_obj~risk67,data=surv_data)
summary(fit_risk67,times=12)
## Call: survfit(formula = surv_obj ~ risk67, data = surv_data)
##
## 11 observations deleted due to missingness
## risk67=High (≥67)
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 11.0000 25.0000 0.7706 0.0403 0.6956
## upper 95% CI
## 0.8538
##
## risk67=Low (<67)
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 2.0000 6.0000 0.9250 0.0294 0.8690
## upper 95% CI
## 0.9846
ggsurvplot(fit_risk67,conf.int=FALSE,pval=TRUE,risk.table=TRUE,
xlab="Months",ylab="Survival",
title="Survival by Risk Score ≥67",
legend.title="Risk Group",
break.time.by=3,xlim=c(0,12))
fit_risk80 <- survfit(surv_obj~risk80,data=surv_data)
summary(fit_risk80,times=12)
## Call: survfit(formula = surv_obj ~ risk80, data = surv_data)
##
## 11 observations deleted due to missingness
## risk80=High (≥80)
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 8.0000 13.0000 0.7679 0.0564 0.6649
## upper 95% CI
## 0.8868
##
## risk80=Low (<80)
## time n.risk n.event survival std.err lower 95% CI
## 12.0000 5.0000 18.0000 0.8647 0.0297 0.8084
## upper 95% CI
## 0.9248
ggsurvplot(fit_risk80,conf.int=FALSE,pval=TRUE,risk.table=TRUE,
xlab="Months",ylab="Survival",
title="Survival by Risk Score ≥80",
legend.title="Risk Group",
break.time.by=3,xlim=c(0,12))
data_los <- data_clean %>%
filter(!is.na(tx_date), !is.na(discharge_date),
!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(tx_date=as.Date(tx_date,format="%m/%d/%Y"),
discharge_date=as.Date(discharge_date,format="%m/%d/%Y"),
LOS_days=as.numeric(difftime(discharge_date,tx_date,units="days")),
risk_score=age_at_tx+5*cmi,
LongStay=ifelse(LOS_days>30,1,0))
summary(data_los$LOS_days)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.00 18.00 20.00 24.83 26.00 100.00
table(data_los$LongStay)
##
## 0 1
## 154 30
ggplot(data_los, aes(x = LOS_days)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
labs(title = "Distribution of LOS",
x = "LOS (days)", y = "N")
fit_los <- lm(LOS_days ~ age_at_tx + cmi, data=data_los)
summary(fit_los)
##
## Call:
## lm(formula = LOS_days ~ age_at_tx + cmi, data = data_los)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.358 -6.517 -4.110 0.696 73.782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.661643 3.785091 5.987 1.12e-08 ***
## age_at_tx 0.008531 0.063175 0.135 0.893
## cmi 0.812862 0.516829 1.573 0.118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.04 on 181 degrees of freedom
## Multiple R-squared: 0.01392, Adjusted R-squared: 0.003026
## F-statistic: 1.278 on 2 and 181 DF, p-value: 0.2812
data_los <- data_los %>%
mutate(age_decade=cut(age_at_tx,breaks=seq(0,100,10),right=FALSE))
ggplot(data_los, aes(x=age_decade,y=LOS_days))+
geom_boxplot(fill="skyblue",alpha=0.7)+
labs(title="LOS by Age Decade",x="Age Group",y="LOS (days)")
roc_los <- roc(LongStay ~ risk_score, data=data_los)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(roc_los)
## Area under the curve: 0.547
plot(roc_los,col="darkblue",lwd=2,
main="ROC: Risk Score Predicting LOS >30 Days")
abline(a=0,b=1,lty=2,col="grey")
best_cut_los <- coords(roc_los,"best",
ret=c("threshold","sensitivity","specificity"))
best_cut_los
## threshold sensitivity specificity
## 1 67.65443 0.7 0.4675325
data_los <- data_los %>%
mutate(RiskGroup80=ifelse(risk_score>=80,"High (≥80)","Low (<80)"))
tbl_los80 <- table(data_los$RiskGroup80,data_los$LongStay)
tbl_los80
##
## 0 1
## High (≥80) 44 9
## Low (<80) 110 21
if (nrow(tbl_los80)==2 && ncol(tbl_los80)==2) {
fisher.test(tbl_los80)
}
##
## Fisher's Exact Test for Count Data
##
## data: tbl_los80
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.3739103 2.5034470
## sample estimates:
## odds ratio
## 0.9336884
ggplot(data_los, aes(x=RiskGroup80, fill=factor(LongStay)))+
geom_bar(position="fill")+
scale_fill_manual(values=c("0"="seagreen","1"="tomato"),
labels=c("0"="≤30 days","1"=">30 days"))+
labs(title="LOS >30 Days by Risk Group (Cutoff=80)",
x="Risk Group",y="Proportion",fill="LOS Outcome")+
theme_minimal()
We worked backwards from a capacity of ~15 patients per year to determine a cutoff on the combined RiskScore = Age + 5×CMI.
# Build scored dataset
data_scored <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(RiskScore = age_at_tx + 5 * cmi,
Tx_Year = format(as.Date(tx_date), "%Y"))
# Candidate cutoffs from 60–120
score_cutoffs <- seq(60, 120, by = 5)
# Count how many patients per year exceed each cutoff
threshold_results <- lapply(score_cutoffs, function(cut) {
counts <- data_scored %>%
filter(RiskScore >= cut) %>%
group_by(Tx_Year) %>%
summarise(n_selected = n(), .groups = "drop") %>%
summarise(mean_per_year = mean(n_selected))
data.frame(Cutoff = cut, MeanPerYear = counts$mean_per_year)
})
threshold_results <- do.call(rbind, threshold_results)
threshold_results
## Cutoff MeanPerYear
## 1 60 33.25
## 2 65 28.75
## 3 70 25.00
## 4 75 18.75
## 5 80 14.00
## 6 85 7.50
## 7 90 3.75
## 8 95 2.25
## 9 100 1.25
## 10 105 1.00
## 11 110 1.00
## 12 115 NaN
## 13 120 NaN
ggplot(threshold_results, aes(x = Cutoff, y = MeanPerYear)) +
geom_line(color = "darkgreen") +
geom_point(size = 3, color = "darkgreen") +
geom_hline(yintercept = 15, linetype = "dashed", color = "red") +
labs(title = "Average Number of Patients per Year Selected by RiskScore Cutoff",
x = "RiskScore Cutoff",
y = "Mean Patients per Year") +
theme_minimal()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Apply cutoff = 80 for illustration
data_scatter <- data_scored %>%
mutate(Selected = ifelse(RiskScore >= 80, "Selected", "Not Selected"))
# Scatterplot of Age vs CMI, highlighting selected patients
p_risk <- ggplot(data_scatter, aes(x = age_at_tx, y = cmi)) +
geom_point(aes(color = Selected), alpha = 0.6) +
scale_color_manual(values = c("Not Selected" = "grey70", "Selected" = "purple")) +
labs(title = "Age vs CMI: Patients Selected for Geriatrics Evaluation (RiskScore ≥80)",
x = "Age at Transplant",
y = "Comorbidity Index (CMI)",
color = "Selection Status") +
theme_minimal()
p_risk
# Identifying Patients for Geriatrics Evaluation Using Age Only
We worked backwards from a capacity of ~15 patients per year to determine an age cutoff.
# Build dataset with age + year
data_age <- data_clean %>%
filter(!is.na(age_at_tx)) %>%
mutate(Tx_Year = format(as.Date(tx_date), "%Y"))
# Candidate age cutoffs from 50–80
age_cutoffs <- seq(50, 80, by = 1)
# Count how many patients per year exceed each cutoff
threshold_age_results <- lapply(age_cutoffs, function(cut) {
counts <- data_age %>%
filter(age_at_tx >= cut) %>%
group_by(Tx_Year) %>%
summarise(n_selected = n(), .groups = "drop") %>%
summarise(mean_per_year = mean(n_selected))
data.frame(Cutoff = cut, MeanPerYear = counts$mean_per_year)
})
threshold_age_results <- do.call(rbind, threshold_age_results)
threshold_age_results
## Cutoff MeanPerYear
## 1 50 35.00
## 2 51 34.50
## 3 52 34.25
## 4 53 33.50
## 5 54 33.00
## 6 55 32.50
## 7 56 31.50
## 8 57 30.75
## 9 58 30.25
## 10 59 29.75
## 11 60 29.25
## 12 61 27.25
## 13 62 25.50
## 14 63 24.50
## 15 64 23.00
## 16 65 21.50
## 17 66 20.00
## 18 67 18.25
## 19 68 16.50
## 20 69 14.75
## 21 70 11.00
## 22 71 7.50
## 23 72 5.75
## 24 73 4.25
## 25 74 3.25
## 26 75 1.75
## 27 76 1.50
## 28 77 1.25
## 29 78 1.00
## 30 79 NaN
## 31 80 NaN
ggplot(threshold_age_results, aes(x = Cutoff, y = MeanPerYear)) +
geom_line(color = "blue") +
geom_point(size = 3, color = "blue") +
geom_hline(yintercept = 15, linetype = "dashed", color = "red") +
labs(title = "Average Number of Patients per Year Selected by Age Cutoff",
x = "Age Cutoff (years)",
y = "Mean Patients per Year") +
theme_minimal()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Apply cutoff = 70 for illustration
data_age_scatter <- data_age %>%
mutate(Selected = ifelse(age_at_tx >= 70, "Selected", "Not Selected"))
# Scatterplot of Age vs CMI, highlighting patients ≥70
p_age <- ggplot(data_age_scatter, aes(x = age_at_tx, y = cmi)) +
geom_point(aes(color = Selected), alpha = 0.6) +
scale_color_manual(values = c("Not Selected" = "grey70", "Selected" = "blue")) +
labs(title = "Patients Selected for Geriatrics Evaluation (Age ≥70)",
x = "Age at Transplant",
y = "Comorbidity Index (CMI)",
color = "Selection Status") +
theme_minimal()
p_age
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).
We tested a combined rule: select patients if age_at_tx > 70 OR cmi > 5.
data_combo <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(
Tx_Year = format(as.Date(tx_date), "%Y"),
Selected = ifelse(age_at_tx > 70 | cmi > 5, "Selected", "Not Selected")
)
table(data_combo$Selected)
##
## Not Selected Selected
## 141 48
combo_per_year <- data_combo %>%
group_by(Tx_Year, Selected) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = Selected, values_from = n, values_fill = 0)
combo_per_year
## # A tibble: 4 × 3
## Tx_Year `Not Selected` Selected
## <chr> <int> <int>
## 1 2022 34 15
## 2 2023 37 10
## 3 2024 45 15
## 4 2025 25 8
ggplot(combo_per_year, aes(x = Tx_Year, y = Selected)) +
geom_col(fill = "purple", alpha = 0.7) +
geom_text(aes(label = Selected), vjust = -0.3, size = 4) +
labs(title = "Number of Patients Selected (Age >70 OR CMI >5) per Year",
x = "Transplant Year",
y = "Number of Selected Patients") +
theme_minimal()
p_combo <- ggplot(data_combo, aes(x = age_at_tx, y = cmi)) +
geom_point(aes(color = Selected), alpha = 0.6) +
scale_color_manual(values = c("Not Selected" = "grey70", "Selected" = "purple")) +
labs(title = "Patients Selected for Geriatrics Evaluation (Age >70 OR CMI >5)",
x = "Age at Transplant",
y = "Comorbidity Index (CMI)",
color = "Selection Status") +
theme_minimal()
p_combo
We compared two rules for selecting patients for geriatrics
evaluation:
1. Age > 70
2. RiskScore ≥ 80 (Age + 5×CMI)
selected_age70 <- data_clean %>%
filter(!is.na(age_at_tx)) %>%
mutate(
Tx_Year = format(as.Date(tx_date), "%Y"),
Selected = ifelse(age_at_tx > 70, "Selected", "Not Selected")
) %>%
group_by(Tx_Year, Selected) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = Selected, values_from = n, values_fill = 0)
selected_age70
## # A tibble: 4 × 3
## Tx_Year `Not Selected` Selected
## <chr> <int> <int>
## 1 2022 40 14
## 2 2023 40 8
## 3 2024 47 13
## 4 2025 29 9
selected_score80 <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(
RiskScore = age_at_tx + 5 * cmi,
Tx_Year = format(as.Date(tx_date), "%Y"),
Selected = ifelse(RiskScore >= 80, "Selected", "Not Selected")
) %>%
group_by(Tx_Year, Selected) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = Selected, values_from = n, values_fill = 0)
selected_score80
## # A tibble: 4 × 3
## Tx_Year `Not Selected` Selected
## <chr> <int> <int>
## 1 2022 33 16
## 2 2023 33 14
## 3 2024 45 15
## 4 2025 22 11
We compared two rules for selecting patients for geriatrics
evaluation:
1. Age > 70
2. RiskScore ≥ 80 (Age + 5×CMI)
selected_age70 <- data_clean %>%
filter(!is.na(age_at_tx)) %>%
mutate(
Tx_Year = format(as.Date(tx_date), "%Y"),
Selected = ifelse(age_at_tx > 70, "Selected", "Not Selected")
) %>%
group_by(Tx_Year, Selected) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = Selected, values_from = n, values_fill = 0)
selected_age70
## # A tibble: 4 × 3
## Tx_Year `Not Selected` Selected
## <chr> <int> <int>
## 1 2022 40 14
## 2 2023 40 8
## 3 2024 47 13
## 4 2025 29 9
selected_score80 <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(
RiskScore = age_at_tx + 5 * cmi,
Tx_Year = format(as.Date(tx_date), "%Y"),
Selected = ifelse(RiskScore >= 80, "Selected", "Not Selected")
) %>%
group_by(Tx_Year, Selected) %>%
summarise(n = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = Selected, values_from = n, values_fill = 0)
selected_score80
## # A tibble: 4 × 3
## Tx_Year `Not Selected` Selected
## <chr> <int> <int>
## 1 2022 33 16
## 2 2023 33 14
## 3 2024 45 15
## 4 2025 22 11
# Rule 1: Age >70
selected_age70 <- data_clean %>%
filter(!is.na(age_at_tx)) %>%
mutate(
Tx_Year = format(as.Date(tx_date), "%Y"),
Rule = "Age >70",
Selected = ifelse(age_at_tx > 70, 1, 0)
) %>%
group_by(Tx_Year, Rule) %>%
summarise(n_selected = sum(Selected), .groups = "drop")
# Rule 2: RiskScore ≥80
selected_score80 <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(
RiskScore = age_at_tx + 5 * cmi,
Tx_Year = format(as.Date(tx_date), "%Y"),
Rule = "RiskScore ≥80",
Selected = ifelse(RiskScore >= 80, 1, 0)
) %>%
group_by(Tx_Year, Rule) %>%
summarise(n_selected = sum(Selected), .groups = "drop")
# Rule 3: Age >70 OR CMI >5
selected_combo <- data_clean %>%
filter(!is.na(age_at_tx), !is.na(cmi)) %>%
mutate(
Tx_Year = format(as.Date(tx_date), "%Y"),
Rule = "Age >70 OR CMI >5",
Selected = ifelse(age_at_tx > 70 | cmi > 5, 1, 0)
) %>%
group_by(Tx_Year, Rule) %>%
summarise(n_selected = sum(Selected), .groups = "drop")
# Combine all
selected_all <- bind_rows(selected_age70, selected_score80, selected_combo)
selected_all
## # A tibble: 12 × 3
## Tx_Year Rule n_selected
## <chr> <chr> <dbl>
## 1 2022 Age >70 14
## 2 2023 Age >70 8
## 3 2024 Age >70 13
## 4 2025 Age >70 9
## 5 2022 RiskScore ≥80 16
## 6 2023 RiskScore ≥80 14
## 7 2024 RiskScore ≥80 15
## 8 2025 RiskScore ≥80 11
## 9 2022 Age >70 OR CMI >5 15
## 10 2023 Age >70 OR CMI >5 10
## 11 2024 Age >70 OR CMI >5 15
## 12 2025 Age >70 OR CMI >5 8
ggplot(selected_all, aes(x = Tx_Year, y = n_selected, color = Rule, group = Rule)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
geom_hline(yintercept = 15, linetype = "dashed", color = "red") +
labs(title = "Number of Patients Selected per Year by Rule",
x = "Transplant Year",
y = "Number of Patients Selected",
color = "Selection Rule") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Average Number of Patients per Year (Summary)
summary_table <- selected_all %>%
group_by(Rule) %>%
summarise(
Avg_per_year = round(mean(n_selected), 1),
Min_per_year = min(n_selected),
Max_per_year = max(n_selected),
.groups = "drop"
)
summary_table
## # A tibble: 3 × 4
## Rule Avg_per_year Min_per_year Max_per_year
## <chr> <dbl> <dbl> <dbl>
## 1 Age >70 11 8 14
## 2 Age >70 OR CMI >5 12 8 15
## 3 RiskScore ≥80 14 11 16
surv_data_age70 <- surv_data %>%
mutate(Group_Age70 = ifelse(age_at_tx > 70, "Age >70", "Age ≤70"))
fit_age70 <- survfit(surv_obj ~ Group_Age70, data = surv_data_age70)
ggsurvplot(fit_age70,
conf.int = FALSE, pval = TRUE, risk.table = TRUE,
xlab = "Months since Transplant", ylab = "Survival Probability",
title = "Survival by Age >70",
legend.title = "Group",
break.time.by = 3, xlim = c(0,12))
surv_data_score80 <- surv_data %>%
mutate(Group_Score80 = ifelse(risk_score >= 80, "RiskScore ≥80", "RiskScore <80"))
fit_score80 <- survfit(surv_obj ~ Group_Score80, data = surv_data_score80)
ggsurvplot(fit_score80,
conf.int = FALSE, pval = TRUE, risk.table = TRUE,
xlab = "Months since Transplant", ylab = "Survival Probability",
title = "Survival by RiskScore ≥80",
legend.title = "Group",
break.time.by = 3, xlim = c(0,12))
surv_data_combo <- surv_data %>%
mutate(Group_Combo = ifelse(age_at_tx > 70 | cmi > 5,
"Age >70 OR CMI >5", "Neither"))
fit_combo <- survfit(surv_obj ~ Group_Combo, data = surv_data_combo)
ggsurvplot(fit_combo,
conf.int = FALSE, pval = TRUE, risk.table = TRUE,
xlab = "Months since Transplant", ylab = "Survival Probability",
title = "Survival by Age >70 OR CMI >5",
legend.title = "Group",
break.time.by = 3, xlim = c(0,12))