library(readr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(gt)
library(ggsignif)
library(readxl)
library(survminer)
library(survival)
library(table1)
library(kableExtra)
library(nricens)
#Loading data and generating relevant 2-year variables
EZH2 <- read_excel("/Users/vni/Downloads/Research/Clinical Research/Dr. Landsburg/Vincent new2.xlsx") %>%
  mutate(
    pfs_2yr = pmin(survtimep, 24),
    pfs_status_2yr = ifelse(survtimep <= 24, survstatusp, 0),
    os_2yr = pmin(survtimeo, 24),
    os_status_2yr = as.numeric(ifelse(survtimeo <= 24, survstatuso, 0))) %>%
  mutate(ipi3 == as.numeric(ipi3)) %>%
  mutate(newipi012 = case_when(
    DEL == 1 | ipi3 == 1 ~ 2,  
    TP53...14 == 1 & (DEL != 1 & ipi3 != 1) ~ 1, 
    DEL == 0 & ipi3 == 0 & TP53...14 == 0 ~ 0    
  ))

#List
features <- c("ipi3", "COO", "DEL", "FISH-MYC", "TP53...14")
features_names <- c("International Prognosis Index", "Cell of Origin (Germinal Center)", "Double Expressor Lymphoma", "FISH-MYC Rearrangment", "TP53 Mutation")

#Cox Regression Table Clean Up
clean_up <- function(data){
  data %>%
    rownames_to_column(var = "Characteristic") %>%
    mutate(across(c(`Hazard Ratio`, `95% Confidence Interval (Low)`, `95% Confidence Interval (High)`), ~ round(.x, 2)),
           `P Value` = formatC(`P Value`, format = "e", digits = 2),
           Characteristic = features_names)
}

# Basic Function to perform Cox regression
perform_cox <- function(time, status, feature) {
  cox_model <- coxph(Surv(time, status) ~ get(feature), data = EZH2)
  summary_cox <- summary(cox_model)
  
  hr <- summary_cox$coef[1, 2]
  ci_low <- summary_cox$conf.int[1, 3]
  ci_high <- summary_cox$conf.int[1, 4]
  p_value <- summary_cox$coef[1, 5]
  
  return(c(`Hazard Ratio` = hr, `95% Confidence Interval (Low)` = ci_low, `95% Confidence Interval (High)` = ci_high, `P Value` = p_value))
}

#Survival Objects
pfs_object <- Surv(EZH2$pfs_2yr, EZH2$pfs_status_2yr)
os_object <- Surv(EZH2$os_2yr, EZH2$os_status_2yr)
#Fill specific functions
perform_cox_os2 <- function(feature) {
  perform_cox(time = EZH2$os_2yr, status = EZH2$os_status_2yr, feature = feature)
}

perform_cox_pfs2 <- function(feature) {
  perform_cox(time = EZH2$pfs_2yr, status = EZH2$pfs_status_2yr, feature = feature)
}

# Perform Cox regression
clean_up(as.data.frame(t(sapply(features, perform_cox_os2)))) %>%
  kbl(caption = "Univariate Cox Regression (OS - 2 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (OS - 2 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 2.26 1.29 3.95 4.40e-03
Cell of Origin (Germinal Center) 0.71 0.40 1.26 2.45e-01
Double Expressor Lymphoma 2.28 1.29 4.01 4.41e-03
FISH-MYC Rearrangment 0.91 0.13 6.58 9.24e-01
TP53 Mutation 2.10 1.16 3.82 1.50e-02
clean_up(as.data.frame(t(sapply(features, perform_cox_pfs2)))) %>%
  kbl(caption = "Univariate Cox Regression (PFS - 2 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (PFS - 2 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 1.86 1.27 2.71 1.36e-03
Cell of Origin (Germinal Center) 0.72 0.49 1.05 9.12e-02
Double Expressor Lymphoma 2.21 1.51 3.24 4.96e-05
FISH-MYC Rearrangment 1.70 0.63 4.61 2.99e-01
TP53 Mutation 1.66 1.08 2.53 2.00e-02
#Multivariate Cox Regression
pfs_multi <- coxph(pfs_object ~ ipi3 + DEL + `TP53...14`, data = EZH2)
summary(pfs_multi)
## Call:
## coxph(formula = pfs_object ~ ipi3 + DEL + TP53...14, data = EZH2)
## 
##   n= 412, number of events= 109 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)    
## ipi31     0.4864    1.6264   0.1961 2.480 0.013132 *  
## DEL       0.7176    2.0494   0.1977 3.630 0.000283 ***
## TP53...14 0.4414    1.5549   0.2179 2.026 0.042757 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## ipi31         1.626     0.6148     1.107     2.389
## DEL           2.049     0.4879     1.391     3.019
## TP53...14     1.555     0.6431     1.015     2.383
## 
## Concordance= 0.636  (se = 0.025 )
## Likelihood ratio test= 26.04  on 3 df,   p=9e-06
## Wald test            = 27.92  on 3 df,   p=4e-06
## Score (logrank) test = 29.18  on 3 df,   p=2e-06
os_multi <- coxph(os_object ~ ipi3 + DEL + `TP53...14`, data = EZH2)
summary(os_multi)
## Call:
## coxph(formula = os_object ~ ipi3 + DEL + TP53...14, data = EZH2)
## 
##   n= 412, number of events= 49 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)  
## ipi31     0.6612    1.9371   0.2909 2.273   0.0230 *
## DEL       0.6920    1.9977   0.2928 2.363   0.0181 *
## TP53...14 0.6474    1.9106   0.3062 2.114   0.0345 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## ipi31         1.937     0.5162     1.095     3.426
## DEL           1.998     0.5006     1.125     3.546
## TP53...14     1.911     0.5234     1.048     3.482
## 
## Concordance= 0.675  (se = 0.035 )
## Likelihood ratio test= 17.53  on 3 df,   p=6e-04
## Wald test            = 18.79  on 3 df,   p=3e-04
## Score (logrank) test = 19.92  on 3 df,   p=2e-04
#Survival Fits (newipi012)
OS_fit <- survfit(Surv(survtimeo, as.numeric(survstatuso)) ~ newipi012, data = EZH2)
PFS_fit <- survfit(Surv(survtimep, as.numeric(survstatusp)) ~ newipi012, data = EZH2)

ggsurvplot(PFS_fit, 
           data = EZH2,
           legend = "bottom",
           legend.title = "IPI",
           legend.labs = c("0", "1", "2"),
           pval = TRUE,
           ggtheme = theme_bw()) +
  xlab("Time (Month)") +
  ylab("PFS Probability")

ggsurvplot(OS_fit, 
           data = EZH2,
           legend = "bottom",
           legend.title = "IPI",
           legend.labs = c("0", "1", "2"),
           pval = TRUE,
           ggtheme = theme_bw()) +
  xlab("Time (Month)") +
  ylab("OS Probability")

#Breakdown
prop.table(table(EZH2$newipi012))
## 
##          0          1          2 
## 0.43689320 0.09466019 0.46844660
prop.table(table(EZH2$ripi))
## 
##         0         1         2 
## 0.1650485 0.5145631 0.3203883
EZH2 %>%
  group_by(newipi012) %>%
  summarise(percent_PFS_2yr_1 = 100 - mean(pfs_status_2yr == 1, na.rm = TRUE) * 100,
            percent_OS_2yr_1 = 100 - mean(os_status_2yr == 1, na.rm = TRUE) * 100) %>%
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  kbl(caption = "By newipi012") %>%
  kable_classic("striped", full_width = F)
By newipi012
newipi012 percent_PFS_2yr_1 percent_OS_2yr_1
0 85.00 96.11
1 71.79 87.18
2 63.21 80.83
EZH2 %>%
  group_by(ripi) %>%
  summarise(percent_PFS_2yr_1 = 100 - mean(pfs_status_2yr == 1, na.rm = TRUE) * 100,
            percent_OS_2yr_1 = 100 - mean(os_status_2yr == 1, na.rm = TRUE) * 100) %>%
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  kbl(caption = "By ripi") %>%
  kable_classic("striped", full_width = F)
By ripi
ripi percent_PFS_2yr_1 percent_OS_2yr_1
0 91.18 98.53
1 75.00 89.62
2 62.12 80.30
#General C-Index and AIC#
# Fit Cox proportional hazards models
ripi_pfs <- coxph(pfs_object ~ ripi, data = EZH2)
newipi012_pfs <- coxph(pfs_object ~ newipi012, data = EZH2)
ripi_os <- coxph(os_object ~ ripi, data = EZH2)
newipi012_os <- coxph(os_object ~ newipi012, data = EZH2)

#Creating Cox Model Objects
list_cox <- list(ripi_pfs, newipi012_pfs, ripi_os, newipi012_os)
model_names <- c("PFS by RIPI", "PFS by New IPI 012", "OS by RIPI", "PFS by New IPI 012")

#Generating summary of AIC and C-Index
c_indices <- numeric(length(list_cox))
aic_values <- numeric(length(list_cox))

for (i in seq_along(list_cox)) {
  model <- list_cox[[i]]
  c_index <- summary(model)$concordance[1]
  c_indices[i] <- c_index
  aic_values[i] <- AIC(model)
}

c_index_AIC_summary <- data.frame(model_names,
                                  c_indices,
                                  aic_values) %>%
  mutate(across(c(c_indices,
                  aic_values), 
                ~ round(.x, 2))) %>%
  rename(`Predictor Model` = model_names,
         `Concordance Index` = c_indices,
         `Akaike Information Criterion` = aic_values)

c_index_AIC_summary %>%
  kbl(caption = "Concordance Index and Akaike Information Criterion by Predictor Model") %>%
  kable_classic("striped", full_width = F)
Concordance Index and Akaike Information Criterion by Predictor Model
Predictor Model Concordance Index Akaike Information Criterion
PFS by RIPI 0.61 1256.38
PFS by New IPI 012 0.62 1252.77
OS by RIPI 0.65 562.92
PFS by New IPI 012 0.67 556.29
#Net Reclassification Index
#Defining cut points for "0", "1", and "2"
cut_points <- c(-Inf, 0.5, 1.5, Inf)

#Calculating NRI
nri_result_os <- nricens(time = EZH2$os_2yr, 
                         event = EZH2$os_status_2yr, 
                         p.std = EZH2$ripi, 
                         p.new = EZH2$newipi012, 
                         t0 = 24, 
                         cut = cut_points,
                         updown = "category")

nri_result_pfs <- nricens(time = EZH2$pfs_2yr, 
                          event = EZH2$pfs_status_2yr, 
                          p.std = EZH2$ripi, 
                          p.new = EZH2$newipi012, 
                          t0 = 24, 
                          cut = cut_points,
                          updown = "category")

nri_os <- nri_result_os[["nri"]] %>%
  mutate(across(where(is.numeric), ~round(., 2)))

nri_pfs <- nri_result_pfs[["nri"]] %>%
  mutate(across(where(is.numeric), ~round(., 2)))
nri_os %>%
  kbl(caption = "Net Reclassification Index (OS)") %>%
  kable_classic("striped", full_width = F)
Net Reclassification Index (OS)
Estimate Lower Upper
NRI 0.30 0.10 0.50
NRI+ 0.13 -0.05 0.30
NRI- 0.17 0.10 0.24
Pr(Up|Case) 0.26 0.14 0.40
Pr(Down|Case) 0.14 0.05 0.24
Pr(Down|Ctrl) 0.35 0.30 0.40
Pr(Up|Ctrl) 0.17 0.13 0.21
nri_pfs %>%
  kbl(caption = "Net Reclassification Index (PFS)") %>%
  kable_classic("striped", full_width = F)
Net Reclassification Index (PFS)
Estimate Lower Upper
NRI 0.18 0.03 0.33
NRI+ -0.01 -0.13 0.10
NRI- 0.18 0.11 0.26
Pr(Up|Case) 0.20 0.13 0.28
Pr(Down|Case) 0.21 0.14 0.29
Pr(Down|Ctrl) 0.36 0.31 0.42
Pr(Up|Ctrl) 0.18 0.13 0.22
#Stratified Log Rank
EZH2$newipi012f <- factor(EZH2$newipi012, levels = c(0, 1, 2), labels = c("Low Risk", "Medium Risk", "High Risk"))

pfs_strat <- survdiff(Surv(survtimep, as.numeric(survstatusp)) ~ newipi012 +strata(ripi), data = EZH2)
os_strat <- survdiff(Surv(survtimeo, as.numeric(survstatuso)) ~ newipi012 +strata(ripi), data = EZH2)

print(pfs_strat)
## Call:
## survdiff(formula = Surv(survtimep, as.numeric(survstatusp)) ~ 
##     newipi012 + strata(ripi), data = EZH2)
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## newipi012=0 180       39    51.83     3.176     8.744
## newipi012=1  39       12     9.25     0.819     0.922
## newipi012=2 193       79    68.92     1.474     6.407
## 
##  Chisq= 8.9  on 2 degrees of freedom, p= 0.01
print(os_strat)
## Call:
## survdiff(formula = Surv(survtimeo, as.numeric(survstatuso)) ~ 
##     newipi012 + strata(ripi), data = EZH2)
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## newipi012=0 180       11    21.75     5.313    13.788
## newipi012=1  39        6     4.41     0.577     0.653
## newipi012=2 193       46    36.84     2.275    11.629
## 
##  Chisq= 14.7  on 2 degrees of freedom, p= 7e-04
# Split the data by strata
strata_levels <- unique(EZH2$ripi)
p_values <- data.frame(Stratum = character(), `P Value (OS)` = numeric(), `P Value (PFS)` = numeric(), stringsAsFactors = FALSE)

for (stratum in strata_levels) {
  subset_data <- subset(EZH2, ripi == stratum)
  
  surv_object_pfs <- Surv(subset_data$pfs_2yr, subset_data$pfs_status_2yr)
  surv_object_os <- Surv(subset_data$os_2yr, subset_data$os_status_2yr)
  
  logrank_test_pfs <- survdiff(surv_object_pfs ~ newipi012, data = subset_data)
  logrank_test_os <- survdiff(surv_object_os ~ newipi012, data = subset_data)
  
  p_value_pfs <- 1 - pchisq(logrank_test_pfs$chisq, length(logrank_test_pfs$n) - 1)
  p_value_os <- 1 - pchisq(logrank_test_os$chisq, length(logrank_test_os$n) - 1)
  
  p_values <- rbind(p_values, data.frame(Stratum = stratum, `P Value (OS)` = p_value_os, `P Value (PFS)` = p_value_pfs))
}

p_values %>%
  rename(`ripi Stratum` = Stratum,
         `P Value (OS)` = P.Value..OS.,
         `P Value (PFS)` = P.Value..PFS.) %>%
  mutate(across(where(is.numeric), ~round(., 3))) %>%
  kbl(caption = "") %>%
  kable_classic("striped", full_width = F)
ripi Stratum P Value (OS) P Value (PFS)
1 0.004 0.008
0 0.897 0.678
2 0.354 0.271