library(readr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(gt)
library(ggsignif)
library(readxl)
library(survminer)
library(survival)
library(table1)
library(kableExtra)
#Loading data and generating relevant 2-year variables
EZH2 <- read_excel("~/Downloads/Research/Clinical Research/Dr. Landsburg/Vincent new.xlsx") %>%
mutate(
pfs_2yr = pmin(survtimep, 24),
pfs_status_2yr = ifelse(survtimep <= 24, survstatusp, 0),
pfs_5yr = pmin(survtimep, 60),
pfs_status_5yr = ifelse(survtimep <= 60, survstatusp, 0),
os_2yr = pmin(survtimeo, 24),
os_status_2yr = as.numeric(ifelse(survtimeo <= 24, survstatuso, 0)),
os_5yr = pmin(survtimeo, 60),
os_status_5yr = as.numeric(ifelse(survtimeo <= 60, survstatuso, 0))
)
## New names:
## • `EZH2` -> `EZH2...12`
## • `TP53` -> `TP53...13`
## • `EZH2` -> `EZH2...16`
## • `TP53` -> `TP53...17`
#List
features <- c("ipi3", "COO", "DEL", "FISH-MYC", "TP53...13")
features_names <- c("International Prognosis Index", "Cell of Origin (Germinal Center)", "Double Expressor Lymphoma", "FISH-MYC Rearrangment", "TP53 Mutation")
#Cox Regression Table Clean Up
clean_up <- function(data){
data %>%
rownames_to_column(var = "Characteristic") %>%
mutate(across(c(`Hazard Ratio`, `95% Confidence Interval (Low)`, `95% Confidence Interval (High)`), ~ round(.x, 2)),
`P Value` = formatC(`P Value`, format = "e", digits = 2),
Characteristic = features_names)
}
# Basic Function to perform Cox regression
perform_cox <- function(time, status, feature) {
cox_model <- coxph(Surv(time, status) ~ get(feature), data = EZH2)
summary_cox <- summary(cox_model)
hr <- summary_cox$coef[1, 2]
ci_low <- summary_cox$conf.int[1, 3]
ci_high <- summary_cox$conf.int[1, 4]
p_value <- summary_cox$coef[1, 5]
return(c(`Hazard Ratio` = hr, `95% Confidence Interval (Low)` = ci_low, `95% Confidence Interval (High)` = ci_high, `P Value` = p_value))
}
#Fill specific functions
perform_cox_os2 <- function(feature) {
perform_cox(time = EZH2$os_2yr, status = EZH2$os_status_2yr, feature = feature)
}
perform_cox_pfs2 <- function(feature) {
perform_cox(time = EZH2$pfs_2yr, status = EZH2$pfs_status_2yr, feature = feature)
}
perform_cox_os5 <- function(feature) {
perform_cox(time = EZH2$os_5yr, status = EZH2$os_status_5yr, feature = feature)
}
perform_cox_pfs5 <- function(feature) {
perform_cox(time = EZH2$pfs_5yr, status = EZH2$pfs_status_5yr, feature = feature)
}
# Perform Cox regression
clean_up(as.data.frame(t(sapply(features, perform_cox_os2)))) %>%
kbl(caption = "Univariate Cox Regression (OS - 2 Years)") %>%
kable_classic("striped", full_width = F)
Univariate Cox Regression (OS - 2 Years)
|
Characteristic
|
Hazard Ratio
|
95% Confidence Interval (Low)
|
95% Confidence Interval (High)
|
P Value
|
|
International Prognosis Index
|
2.26
|
1.29
|
3.95
|
4.40e-03
|
|
Cell of Origin (Germinal Center)
|
0.71
|
0.40
|
1.26
|
2.45e-01
|
|
Double Expressor Lymphoma
|
2.28
|
1.29
|
4.01
|
4.41e-03
|
|
FISH-MYC Rearrangment
|
0.91
|
0.13
|
6.58
|
9.24e-01
|
|
TP53 Mutation
|
2.10
|
1.16
|
3.82
|
1.50e-02
|
clean_up(as.data.frame(t(sapply(features, perform_cox_pfs2)))) %>%
kbl(caption = "Univariate Cox Regression (PFS - 2 Years)") %>%
kable_classic("striped", full_width = F)
Univariate Cox Regression (PFS - 2 Years)
|
Characteristic
|
Hazard Ratio
|
95% Confidence Interval (Low)
|
95% Confidence Interval (High)
|
P Value
|
|
International Prognosis Index
|
1.86
|
1.27
|
2.71
|
1.36e-03
|
|
Cell of Origin (Germinal Center)
|
0.72
|
0.49
|
1.05
|
9.12e-02
|
|
Double Expressor Lymphoma
|
2.21
|
1.51
|
3.24
|
4.96e-05
|
|
FISH-MYC Rearrangment
|
1.70
|
0.63
|
4.61
|
2.99e-01
|
|
TP53 Mutation
|
1.66
|
1.08
|
2.53
|
2.00e-02
|
clean_up(as.data.frame(t(sapply(features, perform_cox_os5)))) %>%
kbl(caption = "Univariate Cox Regression (OS - 5 Years)") %>%
kable_classic("striped", full_width = F)
Univariate Cox Regression (OS - 5 Years)
|
Characteristic
|
Hazard Ratio
|
95% Confidence Interval (Low)
|
95% Confidence Interval (High)
|
P Value
|
|
International Prognosis Index
|
2.14
|
1.30
|
3.52
|
2.69e-03
|
|
Cell of Origin (Germinal Center)
|
0.61
|
0.36
|
1.02
|
6.14e-02
|
|
Double Expressor Lymphoma
|
2.40
|
1.46
|
3.96
|
5.90e-04
|
|
FISH-MYC Rearrangment
|
1.43
|
0.35
|
5.87
|
6.16e-01
|
|
TP53 Mutation
|
1.85
|
1.07
|
3.20
|
2.78e-02
|
clean_up(as.data.frame(t(sapply(features, perform_cox_pfs5)))) %>%
kbl(caption = "Univariate Cox Regression (PFS - 5 Years)") %>%
kable_classic("striped", full_width = F)
Univariate Cox Regression (PFS - 5 Years)
|
Characteristic
|
Hazard Ratio
|
95% Confidence Interval (Low)
|
95% Confidence Interval (High)
|
P Value
|
|
International Prognosis Index
|
1.72
|
1.21
|
2.45
|
2.42e-03
|
|
Cell of Origin (Germinal Center)
|
0.63
|
0.44
|
0.91
|
1.24e-02
|
|
Double Expressor Lymphoma
|
2.08
|
1.45
|
2.96
|
5.91e-05
|
|
FISH-MYC Rearrangment
|
1.42
|
0.52
|
3.84
|
4.91e-01
|
|
TP53 Mutation
|
1.45
|
0.97
|
2.19
|
7.32e-02
|
#Multivariate Cox Regression
pfs_multi <- coxph(Surv(pfs_2yr, pfs_status_2yr) ~ ipi3 + DEL + `TP53...13`, data = EZH2)
summary(pfs_multi)
## Call:
## coxph(formula = Surv(pfs_2yr, pfs_status_2yr) ~ ipi3 + DEL +
## TP53...13, data = EZH2)
##
## n= 412, number of events= 109
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ipi3 0.4864 1.6264 0.1961 2.480 0.013132 *
## DEL 0.7176 2.0494 0.1977 3.630 0.000283 ***
## TP53...13 0.4414 1.5549 0.2179 2.026 0.042757 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## ipi3 1.626 0.6148 1.107 2.389
## DEL 2.049 0.4879 1.391 3.019
## TP53...13 1.555 0.6431 1.015 2.383
##
## Concordance= 0.636 (se = 0.025 )
## Likelihood ratio test= 26.04 on 3 df, p=9e-06
## Wald test = 27.92 on 3 df, p=4e-06
## Score (logrank) test = 29.18 on 3 df, p=2e-06
os_multi <- coxph(Surv(os_2yr, os_status_2yr) ~ ipi3 + DEL + `TP53...13`, data = EZH2)
summary(os_multi)
## Call:
## coxph(formula = Surv(os_2yr, os_status_2yr) ~ ipi3 + DEL + TP53...13,
## data = EZH2)
##
## n= 412, number of events= 49
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ipi3 0.6612 1.9371 0.2909 2.273 0.0230 *
## DEL 0.6920 1.9977 0.2928 2.363 0.0181 *
## TP53...13 0.6474 1.9106 0.3062 2.114 0.0345 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## ipi3 1.937 0.5162 1.095 3.426
## DEL 1.998 0.5006 1.125 3.546
## TP53...13 1.911 0.5234 1.048 3.482
##
## Concordance= 0.675 (se = 0.035 )
## Likelihood ratio test= 17.53 on 3 df, p=6e-04
## Wald test = 18.79 on 3 df, p=3e-04
## Score (logrank) test = 19.92 on 3 df, p=2e-04
#Survival Fits
OS_fit <- survfit(Surv(survtimeo, as.numeric(survstatuso)) ~ newipi012, data = EZH2)
PFS_fit <- survfit(Surv(survtimep, as.numeric(survstatusp)) ~ newipi012, data = EZH2)
ggsurvplot(PFS_fit,
data = EZH2,
legend = "bottom",
legend.title = "IPI",
legend.labs = c("0", "1", "2"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("PFS Probability")

ggsurvplot(OS_fit,
data = EZH2,
legend = "bottom",
legend.title = "IPI",
legend.labs = c("0", "1", "2"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("OS Probability")
