library(readr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggsignif)
library(readxl)
library(survminer)
library(survival)
library(table1)
library(kableExtra)
EZH2 <- read_excel("/Users/vni/Downloads/Research/Clinical Research/Dr. Landsburg/Vincent EZH2.xlsx") %>%
rename(FISH = 'FISH-MYC')
#KM Plots (PFS and OS)
#Generating Mutation Characterization
EZH2_1 <- EZH2 %>%
mutate(Mutation = case_when(
!is.na(`EZH2 var`) & is.na(`TP53 var`) ~ "EZH2 Only",
is.na(`EZH2 var`) & !is.na(`TP53 var`) ~ "TP53 Only",
!is.na(`EZH2 var`) & !is.na(`TP53 var`) ~ "EZH2/TP53 Co-Mutation",
TRUE ~ "Neither"),
Mutation = factor(Mutation, levels = c("EZH2 Only", "TP53 Only", "EZH2/TP53 Co-Mutation", "Neither"))
)
#Reorder Mutation
new_order <- c("EZH2 Only", "TP53 Only", "EZH2/TP53 Co-Mutation", "Neither")
EZH2_1$Mutation <- factor(EZH2_1$Mutation, levels = new_order)
#Survival Fits
PFS_fit <- survfit(Surv(PFS, as.numeric(death)) ~ Mutation, data = EZH2_1)
OS_fit <- survfit(Surv(OS, as.numeric(death)) ~ Mutation, data = EZH2_1)
# Log-rank test for pairwise comparisons
PFS_p_values <- pairwise_survdiff(Surv(PFS, as.numeric(death)) ~ Mutation, data = EZH2_1, p.adjust.method = "holm")
OS_p_values <- pairwise_survdiff(Surv(OS, as.numeric(death)) ~ Mutation, data = EZH2_1, p.adjust.method = "holm")
# Print the p-values for pairwise comparisons
print(PFS_p_values)
##
## Pairwise comparisons using Log-Rank test
##
## data: EZH2_1 and Mutation
##
## EZH2 Only TP53 Only EZH2/TP53 Co-Mutation
## TP53 Only 0.34 - -
## EZH2/TP53 Co-Mutation 1.00 1.00 -
## Neither 1.00 7.6e-06 1.00
##
## P value adjustment method: holm
print(OS_p_values)
##
## Pairwise comparisons using Log-Rank test
##
## data: EZH2_1 and Mutation
##
## EZH2 Only TP53 Only EZH2/TP53 Co-Mutation
## TP53 Only 0.33 - -
## EZH2/TP53 Co-Mutation 1.00 1.00 -
## Neither 1.00 1.3e-05 1.00
##
## P value adjustment method: holm
#Plot
ggsurvplot(PFS_fit,
data = EZH2_1,
legend = "bottom",
legend.title = "Mutation",
legend.labs = c("EZH2 Only", "TP53 Only", "EZH2/TP53 Co-Mutation", "Neither"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("PFS Probability")

ggsurvplot(OS_fit, data = EZH2_1,
legend = "bottom",
legend.title = "Mutation",
legend.labs = c("EZH2 Only", "TP53 Only", "EZH2/TP53 Co-Mutation", "Neither"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("OS Probability")

#KM Plots (MYC-FISH)
#Generating MYC and EZH2 interaction Characterization
EZH2_MYC <- EZH2 %>%
mutate(EZH2_MYC = case_when(
!is.na(`EZH2 var`) & FISH == 1 ~ "EZH2 + MYC-FISH",
!is.na(`EZH2 var`) & FISH == 0 ~ "EZH2 Only",
is.na(`EZH2 var`) & FISH == 1 ~ "MYC-FISH Only",
TRUE ~ "Neither"),
EZH2_MYC = factor(EZH2_MYC, levels = c("EZH2 + MYC-FISH", "EZH2 Only", "MYC-FISH Only", "Neither"))
)
#Reorder Mutation
new_order_MYC <- c("EZH2 + MYC-FISH", "EZH2 Only", "MYC-FISH Only", "Neither")
EZH2_MYC$EZH2_MYC <- factor(EZH2_MYC$EZH2_MYC, levels = new_order_MYC)
#Survival Fits
MYC_PFS_fit <- survfit(Surv(PFS, as.numeric(death)) ~ EZH2_MYC, data = EZH2_MYC)
MYC_OS_fit <- survfit(Surv(OS,as.numeric(death)) ~ EZH2_MYC, data = EZH2_MYC)
# Log-rank test for pairwise comparisons
MYC_PFS_p_values <- pairwise_survdiff(Surv(PFS, as.numeric(death)) ~ EZH2_MYC, data = EZH2_MYC, p.adjust.method = "holm")
MYC_OS_p_values <- pairwise_survdiff(Surv(OS, as.numeric(death)) ~ EZH2_MYC, data = EZH2_MYC, p.adjust.method = "holm")
# Print the p-values for pairwise comparisons
print(MYC_PFS_p_values)
##
## Pairwise comparisons using Log-Rank test
##
## data: EZH2_MYC and EZH2_MYC
##
## EZH2 + MYC-FISH EZH2 Only MYC-FISH Only
## EZH2 Only 0.913 - -
## MYC-FISH Only 0.913 0.004 -
## Neither 0.913 0.913 7.9e-11
##
## P value adjustment method: holm
print(MYC_OS_p_values)
##
## Pairwise comparisons using Log-Rank test
##
## data: EZH2_MYC and EZH2_MYC
##
## EZH2 + MYC-FISH EZH2 Only MYC-FISH Only
## EZH2 Only 0.8453 - -
## MYC-FISH Only 0.9109 0.0042 -
## Neither 0.9109 0.9109 2.3e-10
##
## P value adjustment method: holm
#Plot
ggsurvplot(MYC_PFS_fit,
data = EZH2_MYC,
legend = "bottom",
legend.title = "Mutation",
legend.labs = c("EZH2 + MYC-FISH", "EZH2 Only", "MYC-FISH Only", "Neither"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("PFS Probability")

ggsurvplot(MYC_OS_fit,
data = EZH2_MYC,
legend = "bottom",
legend.title = "Mutation",
legend.labs = c("EZH2 + MYC-FISH", "EZH2 Only", "MYC-FISH Only", "Neither"),
pval = TRUE,
ggtheme = theme_bw()) +
xlab("Time (Month)") +
ylab("OS Probability")

var_list <- list("DEL", "ipi3", "COO", "FISH", "DHL", "TP53")
x_square_results <- matrix()
p_values <- matrix()
for(i in var_list) {
#Create contingency tables
con_name <- paste0(i, "_contigency")
contingency <- table(EZH2$EZH2, EZH2[[i]])
assign(con_name, contingency)
#Chi-Square Test
chisq_name <- paste0(i, "_chisq")
chi_square_result <- chisq.test(contingency)
assign(chisq_name, chi_square_result)
# Print the chi-square test result
cat("Chi-square test for", con_name, ":\n")
print(chi_square_result)
cat("\n")
x_square_results[i] <- chi_square_result[["statistic"]][["X-squared"]]
p_values[i] <- chi_square_result[["p.value"]]
}
## Chi-square test for DEL_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 0.16205, df = 1, p-value = 0.6873
##
##
## Chi-square test for ipi3_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 5.2863, df = 1, p-value = 0.02149
##
##
## Chi-square test for COO_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 22.103, df = 1, p-value = 2.584e-06
##
##
## Chi-square test for FISH_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 5.566, df = 1, p-value = 0.01831
##
##
## Chi-square test for DHL_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 28.119, df = 1, p-value = 1.141e-07
##
##
## Chi-square test for TP53_contigency :
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency
## X-squared = 0.075904, df = 1, p-value = 0.7829
#Cleaning up the results
x_square_full <- cbind(as.data.frame(x_square_results), as.data.frame(p_values)) %>%
rename(`χ2` = x_square_results,
`P Value` = p_values) %>%
filter(!is.na(`χ2`)) %>%
mutate(`χ2` = round(as.numeric(`χ2`), 2),
`P Value` = format(as.numeric(`P Value`), scientific = TRUE, digits = 2))
#Rename rownames
new_rownames <- c("Double Expressor Lymphoma", "International Prognotic Index Score", "Cell of Origin (Germinal Center)", "FISH-MYC Rearrangement", "Double Hit Lymphoma", "TP53 Mutation")
rownames(x_square_full) <- new_rownames
#Table
x_square_full %>%
kbl(caption = "Association of Incidence") %>%
kable_classic("striped", full_width = F)
Association of Incidence
|
|
χ2
|
P Value
|
|
Double Expressor Lymphoma
|
0.16
|
6.9e-01
|
|
International Prognotic Index Score
|
5.29
|
2.1e-02
|
|
Cell of Origin (Germinal Center)
|
22.10
|
2.6e-06
|
|
FISH-MYC Rearrangement
|
5.57
|
1.8e-02
|
|
Double Hit Lymphoma
|
28.12
|
1.1e-07
|
|
TP53 Mutation
|
0.08
|
7.8e-01
|
#Convert EZH2_mut to a factor for modeling
EZH2$EZH2 <- factor(EZH2$EZH2, levels = c(0, 1), labels = c("No Mutation", "Mutation"))
#Stratified Log Rank Formula
strat_log <- function(outcome, var, data) {
formula_str <- paste("Surv(", outcome, ", as.numeric(death)) ~ EZH2 + strata(", var, ")", sep = "")
formula <- as.formula(formula_str)
survdiff(formula, data = data)
}
OS_strat_log_results <- matrix()
OS_strat_p_value <- matrix()
PFS_strat_log_results <- matrix()
PFS_strat_p_value <- matrix()
#Loop Iterations for Stratified Log Rank
for(i in var_list) {
var <- i
OS_strat <- strat_log(outcome = "OS", var, data = EZH2_1)
PFS_strat <- strat_log(outcome = "PFS", var, data = EZH2_1)
print(OS_strat)
print(PFS_strat)
OS_strat_log_results[i] <- OS_strat[["chisq"]]
OS_strat_p_value[i] <- OS_strat[["pvalue"]]
PFS_strat_log_results[i] <- PFS_strat[["chisq"]]
PFS_strat_p_value[i] <- PFS_strat[["pvalue"]]
}
## Call:
## survdiff(formula = formula, data = data)
##
## n=874, 49 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 833 142 141.43 0.00231 0.0522
## EZH2=1 41 6 6.57 0.04976 0.0522
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.8
## Call:
## survdiff(formula = formula, data = data)
##
## n=874, 49 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 833 142 141.43 0.00231 0.0521
## EZH2=1 41 6 6.57 0.04968 0.0521
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.8
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 151.6 0.0763 1.2
## EZH2=1 47 7 10.4 1.1118 1.2
##
## Chisq= 1.2 on 1 degrees of freedom, p= 0.3
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 151.9 0.0648 1.05
## EZH2=1 47 7 10.1 0.9705 1.05
##
## Chisq= 1 on 1 degrees of freedom, p= 0.3
## Call:
## survdiff(formula = formula, data = data)
##
## n=488, 435 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 452 83 80.97 0.051 0.78
## EZH2=1 36 4 6.03 0.685 0.78
##
## Chisq= 0.8 on 1 degrees of freedom, p= 0.4
## Call:
## survdiff(formula = formula, data = data)
##
## n=488, 435 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 452 83 81.13 0.0431 0.677
## EZH2=1 36 4 5.87 0.5952 0.677
##
## Chisq= 0.7 on 1 degrees of freedom, p= 0.4
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 152.31 0.0477 0.815
## EZH2=1 47 7 9.69 0.7489 0.815
##
## Chisq= 0.8 on 1 degrees of freedom, p= 0.4
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 152.47 0.0421 0.728
## EZH2=1 47 7 9.53 0.6737 0.728
##
## Chisq= 0.7 on 1 degrees of freedom, p= 0.4
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 151.6 0.0752 1.29
## EZH2=1 47 7 10.4 1.0994 1.29
##
## Chisq= 1.3 on 1 degrees of freedom, p= 0.3
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 151.6 0.0783 1.32
## EZH2=1 47 7 10.4 1.1358 1.32
##
## Chisq= 1.3 on 1 degrees of freedom, p= 0.3
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 154 0.00647 0.131
## EZH2=1 47 7 8 0.12461 0.131
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.7
## Call:
## survdiff(formula = formula, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## EZH2=0 876 155 154.14 0.00475 0.0981
## EZH2=1 47 7 7.86 0.09313 0.0981
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.8
#Cleaning up Stratified Log Rank Results
OS_strat_log_results <- cbind(as.data.frame(OS_strat_log_results), as.data.frame(OS_strat_p_value)) %>%
filter(!is.na(OS_strat_log_results)) %>%
mutate(`χ2` = round(as.numeric(OS_strat_log_results), 2),
`P Value` = format(as.numeric(OS_strat_p_value), scientific = TRUE, digits = 2))
OS_strat_log_results <- OS_strat_log_results[, c(3, 4)]
PFS_strat_log_results <- cbind(as.data.frame(PFS_strat_log_results), as.data.frame(PFS_strat_p_value)) %>%
filter(!is.na(PFS_strat_log_results)) %>%
mutate(`χ2` = round(as.numeric(PFS_strat_log_results), 2),
`P Value` = format(as.numeric(PFS_strat_p_value), scientific = TRUE, digits = 2))
PFS_strat_log_results <- PFS_strat_log_results[, c(3, 4)]
#Rename rownames
rownames(OS_strat_log_results) <- new_rownames
rownames(PFS_strat_log_results) <- new_rownames
#Table
OS_strat_log_results %>%
kbl(caption = "Stratified Log Rank (Overall Survival)") %>%
kable_classic("striped", full_width = F)
Stratified Log Rank (Overall Survival)
|
|
χ2
|
P Value
|
|
Double Expressor Lymphoma
|
0.05
|
8.2e-01
|
|
International Prognotic Index Score
|
1.20
|
2.7e-01
|
|
Cell of Origin (Germinal Center)
|
0.78
|
3.8e-01
|
|
FISH-MYC Rearrangement
|
0.82
|
3.7e-01
|
|
Double Hit Lymphoma
|
1.29
|
2.6e-01
|
|
TP53 Mutation
|
0.13
|
7.2e-01
|
PFS_strat_log_results %>%
kbl(caption = "Stratified Log Rank (Progression-Free Survival)") %>%
kable_classic("striped", full_width = F)
Stratified Log Rank (Progression-Free Survival)
|
|
χ2
|
P Value
|
|
Double Expressor Lymphoma
|
0.05
|
8.2e-01
|
|
International Prognotic Index Score
|
1.05
|
3.1e-01
|
|
Cell of Origin (Germinal Center)
|
0.68
|
4.1e-01
|
|
FISH-MYC Rearrangement
|
0.73
|
3.9e-01
|
|
Double Hit Lymphoma
|
1.32
|
2.5e-01
|
|
TP53 Mutation
|
0.10
|
7.5e-01
|