library(readr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(gt)
library(ggsignif)
library(readxl)
library(survminer)
library(survival)
library(table1)
library(kableExtra)
#Loading data and generating relevant 2-year variables
EZH2 <- read_excel("~/Downloads/Research/Clinical Research/Dr. Landsburg/Vincent new.xlsx") %>%
  mutate(
    pfs_2yr = pmin(survtimep, 24),
    pfs_status_2yr = ifelse(survtimep <= 24, survstatusp, 0),
    pfs_5yr = pmin(survtimep, 60),
    pfs_status_5yr = ifelse(survtimep <= 60, survstatusp, 0),
    os_2yr = pmin(survtimeo, 24),
    os_status_2yr = as.numeric(ifelse(survtimeo <= 24, survstatuso, 0)),
    os_5yr = pmin(survtimeo, 60),
    os_status_5yr = as.numeric(ifelse(survtimeo <= 60, survstatuso, 0))
  )
## New names:
## • `EZH2` -> `EZH2...12`
## • `TP53` -> `TP53...13`
## • `EZH2` -> `EZH2...16`
## • `TP53` -> `TP53...17`
#List
features <- c("ipi3", "COO", "DEL", "FISH-MYC", "TP53...13")
features_names <- c("International Prognosis Index", "Cell of Origin (Germinal Center)", "Double Expressor Lymphoma", "FISH-MYC Rearrangment", "TP53 Mutation")

#Cox Regression Table Clean Up
clean_up <- function(data){
  data %>%
    rownames_to_column(var = "Characteristic") %>%
    mutate(across(c(`Hazard Ratio`, `95% Confidence Interval (Low)`, `95% Confidence Interval (High)`), ~ round(.x, 2)),
           `P Value` = formatC(`P Value`, format = "e", digits = 2),
           Characteristic = features_names)
}

# Basic Function to perform Cox regression
perform_cox <- function(time, status, feature) {
  cox_model <- coxph(Surv(time, status) ~ get(feature), data = EZH2)
  summary_cox <- summary(cox_model)
  
  hr <- summary_cox$coef[1, 2]
  ci_low <- summary_cox$conf.int[1, 3]
  ci_high <- summary_cox$conf.int[1, 4]
  p_value <- summary_cox$coef[1, 5]
  
  return(c(`Hazard Ratio` = hr, `95% Confidence Interval (Low)` = ci_low, `95% Confidence Interval (High)` = ci_high, `P Value` = p_value))
}
#Fill specific functions
perform_cox_os2 <- function(feature) {
  perform_cox(time = EZH2$os_2yr, status = EZH2$os_status_2yr, feature = feature)
}

perform_cox_pfs2 <- function(feature) {
  perform_cox(time = EZH2$pfs_2yr, status = EZH2$pfs_status_2yr, feature = feature)
}

perform_cox_os5 <- function(feature) {
  perform_cox(time = EZH2$os_5yr, status = EZH2$os_status_5yr, feature = feature)
}

perform_cox_pfs5 <- function(feature) {
  perform_cox(time = EZH2$pfs_5yr, status = EZH2$pfs_status_5yr, feature = feature)
}

# Perform Cox regression
clean_up(as.data.frame(t(sapply(features, perform_cox_os2)))) %>%
  kbl(caption = "Univariate Cox Regression (OS - 2 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (OS - 2 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 2.26 1.29 3.95 4.40e-03
Cell of Origin (Germinal Center) 0.71 0.40 1.26 2.45e-01
Double Expressor Lymphoma 2.28 1.29 4.01 4.41e-03
FISH-MYC Rearrangment 0.91 0.13 6.58 9.24e-01
TP53 Mutation 2.10 1.16 3.82 1.50e-02
clean_up(as.data.frame(t(sapply(features, perform_cox_pfs2)))) %>%
  kbl(caption = "Univariate Cox Regression (PFS - 2 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (PFS - 2 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 1.86 1.27 2.71 1.36e-03
Cell of Origin (Germinal Center) 0.72 0.49 1.05 9.12e-02
Double Expressor Lymphoma 2.21 1.51 3.24 4.96e-05
FISH-MYC Rearrangment 1.70 0.63 4.61 2.99e-01
TP53 Mutation 1.66 1.08 2.53 2.00e-02
clean_up(as.data.frame(t(sapply(features, perform_cox_os5)))) %>%
  kbl(caption = "Univariate Cox Regression (OS - 5 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (OS - 5 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 2.14 1.30 3.52 2.69e-03
Cell of Origin (Germinal Center) 0.61 0.36 1.02 6.14e-02
Double Expressor Lymphoma 2.40 1.46 3.96 5.90e-04
FISH-MYC Rearrangment 1.43 0.35 5.87 6.16e-01
TP53 Mutation 1.85 1.07 3.20 2.78e-02
clean_up(as.data.frame(t(sapply(features, perform_cox_pfs5)))) %>%
  kbl(caption = "Univariate Cox Regression (PFS - 5 Years)") %>%
  kable_classic("striped", full_width = F)
Univariate Cox Regression (PFS - 5 Years)
Characteristic Hazard Ratio 95% Confidence Interval (Low) 95% Confidence Interval (High) P Value
International Prognosis Index 1.72 1.21 2.45 2.42e-03
Cell of Origin (Germinal Center) 0.63 0.44 0.91 1.24e-02
Double Expressor Lymphoma 2.08 1.45 2.96 5.91e-05
FISH-MYC Rearrangment 1.42 0.52 3.84 4.91e-01
TP53 Mutation 1.45 0.97 2.19 7.32e-02
#Multivariate Cox Regression
pfs_multi <- coxph(Surv(pfs_2yr, pfs_status_2yr) ~ ipi3 + DEL + `TP53...13`, data = EZH2)
summary(pfs_multi)
## Call:
## coxph(formula = Surv(pfs_2yr, pfs_status_2yr) ~ ipi3 + DEL + 
##     TP53...13, data = EZH2)
## 
##   n= 412, number of events= 109 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)    
## ipi3      0.4864    1.6264   0.1961 2.480 0.013132 *  
## DEL       0.7176    2.0494   0.1977 3.630 0.000283 ***
## TP53...13 0.4414    1.5549   0.2179 2.026 0.042757 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## ipi3          1.626     0.6148     1.107     2.389
## DEL           2.049     0.4879     1.391     3.019
## TP53...13     1.555     0.6431     1.015     2.383
## 
## Concordance= 0.636  (se = 0.025 )
## Likelihood ratio test= 26.04  on 3 df,   p=9e-06
## Wald test            = 27.92  on 3 df,   p=4e-06
## Score (logrank) test = 29.18  on 3 df,   p=2e-06
os_multi <- coxph(Surv(os_2yr, os_status_2yr) ~ ipi3 + DEL + `TP53...13`, data = EZH2)
summary(os_multi)
## Call:
## coxph(formula = Surv(os_2yr, os_status_2yr) ~ ipi3 + DEL + TP53...13, 
##     data = EZH2)
## 
##   n= 412, number of events= 49 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)  
## ipi3      0.6612    1.9371   0.2909 2.273   0.0230 *
## DEL       0.6920    1.9977   0.2928 2.363   0.0181 *
## TP53...13 0.6474    1.9106   0.3062 2.114   0.0345 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## ipi3          1.937     0.5162     1.095     3.426
## DEL           1.998     0.5006     1.125     3.546
## TP53...13     1.911     0.5234     1.048     3.482
## 
## Concordance= 0.675  (se = 0.035 )
## Likelihood ratio test= 17.53  on 3 df,   p=6e-04
## Wald test            = 18.79  on 3 df,   p=3e-04
## Score (logrank) test = 19.92  on 3 df,   p=2e-04
#Survival Fits
OS_fit <- survfit(Surv(survtimeo, as.numeric(survstatuso)) ~ newipi012, data = EZH2)
PFS_fit <- survfit(Surv(survtimep, as.numeric(survstatusp)) ~ newipi012, data = EZH2)

ggsurvplot(PFS_fit, 
           data = EZH2,
           legend = "bottom",
           legend.title = "IPI",
           legend.labs = c("0", "1", "2"),
           pval = TRUE,
           ggtheme = theme_bw()) +
  xlab("Time (Month)") +
  ylab("PFS Probability")

ggsurvplot(OS_fit, 
           data = EZH2,
           legend = "bottom",
           legend.title = "IPI",
           legend.labs = c("0", "1", "2"),
           pval = TRUE,
           ggtheme = theme_bw()) +
  xlab("Time (Month)") +
  ylab("OS Probability")