#Loading libraries

library(survival)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(readxl)
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma

#Loading data

data = read_excel('~/Downloads/Meta of KM (PFA)/Reconstructed/Reconstructed data.xlsx') %>% mutate_if(is.character, ~as.factor(as.character(.)))

#Building a Survival Object

surv <- Surv(time = data$time, event = data$status)

#Median survival time

fita <- survfit(surv ~ 1)
print(fita)
## Call: survfit(formula = surv ~ 1)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 929    344   1559     900      NA
fitc <- survfit(surv ~ data$treat)
print(fitc)
## Call: survfit(formula = surv ~ data$treat)
## 
##                              n events median 0.95LCL 0.95UCL
## data$treat=Endo            582    234    871     744    1559
## data$treat=Endo-epicardial 347    110     NA      NA      NA

#Testing the difference between two survival curves

survdiff(surv ~ data$treat)
## Call:
## survdiff(formula = surv ~ data$treat)
## 
##                              N Observed Expected (O-E)^2/E (O-E)^2/V
## data$treat=Endo            582      234      205      3.96      9.95
## data$treat=Endo-epicardial 347      110      139      5.87      9.95
## 
##  Chisq= 9.9  on 1 degrees of freedom, p= 0.002

jpeg(file=“~/Downloads/Meta of KM (PFA)/Results/Survival plot all.jpeg”) dev.off()

#K-M Plot with a number at risk table

#jpeg(file="~/Downloads/Meta of KM (PFA)/Results/Survival plot all.jpeg")

ggsurvplot(fitc, data = data, size = 1,
   palette = c("purple", "darkgoldenrod"), 
   conf.int = TRUE, 
   pval = TRUE, 
   risk.table = TRUE, 
   risk.table.height = 0.25, 
   risk.table.y.text.col = T, 
   xlab = "Time in Days", 
   break.time.by = 500,
   legend.labs = c("Endo", "Endo-Epicardial"), 
   ggtheme = theme_bw() 
   )

#dev.off()

ggsurvplot(fita, data = data,size = 1,
   conf.int = TRUE, 
   risk.table = TRUE, 
   risk.table.height = 0.25,
   xlab = "Time in Days",
   ggtheme = theme_bw() ,
   break.time.by = 500
)

#Cumulative hazard function

fit33 <- survfit(Surv(time, status) ~ treat, data = data)
ggsurvplot(fit33, data,
          conf.int = TRUE,
          risk.table.col = "strata", 
          ggtheme = theme_bw(), 
          palette = c("#E7B800", "#2E9FDF"),
          fun = "cumhaz")

#Conditional survival over all data

# plot the conditional survival curves at baseline, and for those who have survived 0:10 years
library(condsurv)
data$os_yrs = data$time/(30*12)
myfit <- survfit(Surv(os_yrs, status) ~ 1, data = data)

cond_times <- seq(0, 10, 1)

gg_conditional_surv(
  basekm = myfit, 
  at = cond_times,
  xlab = "Years",
  main = "Conditional survival: all"
  ) +
  labs(color = "Conditional Time")

#conditional survival estimate of surviving to a variety of different time points given that the subject has already survived for 0 years
prob_times <- seq(0, 10, 1)

purrr::map_df(
  prob_times, 
  ~conditional_surv_est(
    basekm = myfit, 
    t1 = 0, 
    t2 = .x) 
  ) %>% 
  mutate(years = prob_times) %>% 
  select(years, everything()) %>% 
  knitr::kable()
years cs_est cs_lci cs_uci
0 1.00 1.00 1.00
1 0.67 0.63 0.70
2 0.60 0.56 0.63
3 0.53 0.49 0.58
4 0.50 0.46 0.55
5 0.48 0.43 0.54
6 0.48 0.43 0.54
7 0.48 0.43 0.54
8 0.48 0.43 0.54
9 0.48 0.43 0.54
10 0.48 0.43 0.54

#Conditional survival in endo alone

datae <- as_tibble(data)
datae = datae  %>% filter(treat == "Endo")

# plot the conditional survival curves at baseline, and for those who have survived 0:10 years

datae$os_yrs = datae$time/(30*12)
myfit <- survfit(Surv(os_yrs, status) ~ 1, data = datae)

cond_times <- seq(0, 10, 1)

gg_conditional_surv(
  basekm = myfit, 
  at = cond_times,
  xlab = "Years",
  main = "Conditional survival: Endo"
  ) +
  labs(color = "Conditional Time")

#conditional survival estimate of surviving to a variety of different time points given that the subject has already survived for 0 years
prob_times <- seq(0, 10, 1)

purrr::map_df(
  prob_times, 
  ~conditional_surv_est(
    basekm = myfit, 
    t1 = 0, 
    t2 = .x) 
  ) %>% 
  mutate(years = prob_times) %>% 
  select(years, everything()) %>% 
  knitr::kable()
years cs_est cs_lci cs_uci
0 1.00 1.00 1.00
1 0.65 0.61 0.69
2 0.55 0.50 0.60
3 0.45 0.39 0.51
4 0.43 0.36 0.50
5 0.41 0.34 0.48
6 0.41 0.34 0.48
7 0.41 0.34 0.48
8 0.41 0.34 0.48
9 0.41 0.34 0.48
10 0.41 0.34 0.48

#Conditional survival in endo/epi group

datai <- as_tibble(data)
datai = datai  %>% filter(treat == "Endo-epicardial")
# plot the conditional survival curves at baseline, and for those who have survived 0:10 years

datai$os_yrs = datai$time/(30*12)
myfit <- survfit(Surv(os_yrs, status) ~ 1, data = datai)

cond_times <- seq(0, 10, 1)

gg_conditional_surv(
  basekm = myfit, 
  at = cond_times,
  xlab = "Years",
  main = "Conditional survival: Endo-epicardial"
  ) +
  labs(color = "Conditional Time")

#conditional survival estimate of surviving to a variety of different time points given that the subject has already survived for 0 years
prob_times <- seq(0, 10, 1)

purrr::map_df(
  prob_times, 
  ~conditional_surv_est(
    basekm = myfit, 
    t1 = 0, 
    t2 = .x) 
  ) %>% 
  mutate(years = prob_times) %>% 
  select(years, everything()) %>% 
  knitr::kable()
years cs_est cs_lci cs_uci
0 1.00 1.00 1.00
1 0.70 0.65 0.75
2 0.67 0.61 0.72
3 0.64 0.58 0.70
4 0.60 0.53 0.67
5 0.58 0.50 0.65
6 0.58 0.50 0.65
7 0.58 0.50 0.65
8 0.58 0.50 0.65
9 0.58 0.50 0.65
10 0.58 0.50 0.65

#Cox Regression Model assumption

mv_fit <- coxph(Surv(time, status) ~ treat, data = data)
cz <- cox.zph(mv_fit)
print(cz)
##        chisq df    p
## treat  0.525  1 0.47
## GLOBAL 0.525  1 0.47
plot(cz)
ggcoxzph(cz)

summary(mv_fit)
## Call:
## coxph(formula = Surv(time, status) ~ treat, data = data)
## 
##   n= 929, number of events= 344 
## 
##                         coef exp(coef) se(coef)     z Pr(>|z|)   
## treatEndo-epicardial -0.3649    0.6943   0.1162 -3.14  0.00169 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## treatEndo-epicardial    0.6943       1.44    0.5529    0.8719
## 
## Concordance= 0.535  (se = 0.013 )
## Likelihood ratio test= 10.25  on 1 df,   p=0.001
## Wald test            = 9.86  on 1 df,   p=0.002
## Score (logrank) test = 9.97  on 1 df,   p=0.002
library(broom)
tidy(mv_fit, exponentiate = TRUE)
## # A tibble: 1 × 5
##   term                 estimate std.error statistic p.value
##   <chr>                   <dbl>     <dbl>     <dbl>   <dbl>
## 1 treatEndo-epicardial    0.694     0.116     -3.14 0.00169
Publish::publish(mv_fit)
##  Variable           Units HazardRatio       CI.95   p-value 
##     treat            Endo         Ref                       
##           Endo-epicardial        0.69 [0.55;0.87]   0.00169
anova(mv_fit)
## Analysis of Deviance Table
##  Cox model: response is Surv(time, status)
## Terms added sequentially (first to last)
## 
##        loglik Chisq Df Pr(>|Chi|)   
## NULL  -2190.5                       
## treat -2185.4 10.25  1   0.001367 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Fitting a Cox Model using cph from the rms package

library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
units(data$time) <- "Days"
d <- datadist(data)
options(datadist = "d")

hemsurv <- Surv(time = data$time, event = data$status)

model_hem <- cph(hemsurv ~ treat, data = data, 
                 x = TRUE, y = TRUE, surv = TRUE)
plot(summary(model_hem),main = "")
title(main = "Hazard Ratio",line = 1.1, adj =0.65)

#Treatment’s effect on log relative hazard

ggplot(Predict(model_hem, treat))

#nomogram

survx <- Survival(model_hem)
plot(nomogram(model_hem, fun=list(function(x) survx(365, x),
                                  function(x) survx(365*2, x),
                                  function(x) survx(365*3, x),
                                  function(x) survx(365*10, x)),
                                  funlabel=c("One-Year Pr(Survival)", 
                                             "Two-Year Pr(Survival)",
                                             "Three-Year Pr(Survival)",
                                             "Ten-Years Pr(Survival)")))

#Landmark approach ### Step 1 Select landmark time ### Step 2 Subset population for those followed at least until landmark time ###Step 3 Calculate follow-up time from landmark and apply traditional methods.

lm_dat <- 
  data %>% 
  filter(time >= 1000) 
lm_dat <- 
  lm_dat %>% 
  mutate(
    lm_T1 = time - 1000
    )

We exclude 800 patients who were not followed until the landmark time of 90 days

surv <- Surv(time = lm_dat$lm_T1, event = lm_dat$status)
fit <- survfit(surv ~ lm_dat$treat)
  ggsurvplot(fit, data = lm_dat, size = 1,
      palette = c("purple", "darkgoldenrod"), 
      conf.int = TRUE, 
      pval = TRUE, 
      risk.table = TRUE, 
      risk.table.height = 0.25, 
      risk.table.y.text.col = T, 
      xlab = "Days from 1000-day landmark", 
      break.time.by = 500,
      legend.labs = c("Endo", "Endo-Epicardial"), 
      ggtheme = theme_bw() )

library(gtsummary)
library(survival)
coxph(
  Surv(time, status) ~ treat, 
  subset = time >= 1000, 
  data = data
  ) %>% 
  tbl_regression(exp = TRUE)

#Sensitivity Analysis ( For each subtype of the diseases)

groups = levels(data$group)
group_name <- list()
HR <- list()  
seHR <- list() 
P_value <- list()
for(i in groups ) {                                           
  datag <- data  %>% filter(group == i) 
  fit <- coxph(Surv(time, status) ~ treat, data = datag)          
  sumfit <-summary(fit)  
  sumfit <- as.data.frame(sumfit$coefficients)
  group_name <- append(group_name, i)
  HR <-  append(HR,c(sumfit$`exp(coef)`))
  seHR <- append(seHR,c(sumfit$`se(coef)`)) 
  P_value <- append(P_value,c(sumfit$`Pr(>|z|)`)) 
}

hazardratios = data.frame(unlist(group_name),unlist(HR), unlist(seHR), unlist(P_value)) %>% clean_names()
hazardratios <- as_tibble(hazardratios)
hazardratios <- hazardratios %>% rename( Disease = unlist_group_name,
                         'HR of the combined therapy' = unlist_hr ,
                         'standard error' = unlist_se_hr,
                         P.value = unlist_p_value )

hazardratios
## # A tibble: 3 × 4
##   Disease `HR of the combined therapy` `standard error`  P.value
##   <chr>                          <dbl>            <dbl>    <dbl>
## 1 ARVC                           0.541            0.178 0.000550
## 2 ICM                            0.440            0.366 0.0250  
## 3 NICM                           0.835            0.187 0.335
for(i in groups ) {                                           
   datag <- data  %>% filter(group == i) 
   surv <- Surv(time = datag$time, event = datag$status)

   fit <- survfit(surv ~ datag$treat)
  
   
   plot <- ggsurvplot(fit, data = datag, size = 1,
      palette = c("purple", "darkgoldenrod"), 
      conf.int = TRUE, 
      pval = TRUE, 
      risk.table = TRUE, 
      risk.table.height = 0.25, 
      risk.table.y.text.col = T, 
      xlab = "Time in Days", 
      break.time.by = 500,
      legend.title = i ,
      legend.labs = c("Endo", "Endo-Epicardial"), 
      ggtheme = theme_bw() )
   print(plot)
  
}

#Heterogniety

studies = levels(data$study)
study_names <- list()
HR <- list()  
seHR <- list() 
for(i in studies ) {                                           
  datag <- data  %>% filter(study == i) 
  fit <- coxph(Surv(time, status) ~ treat, data = datag)                        
  sumfit <- summary(fit)  
  sumfit <- as.data.frame(sumfit$coefficients)
  study_names <- append(study_names, i)
  HR <-  append(HR,c(sumfit$`exp(coef)`))
  seHR <- append(seHR,c(sumfit$`se(coef)`))
}

hazardperstudy = data.frame(unlist(study_names),unlist(HR), unlist(seHR)) %>% clean_names()
library(meta)
## Loading 'meta' package (version 5.2-0).
## Type 'help(meta)' for a brief overview.
## Readers of 'Meta-Analysis with R (Use R!)' should install
## older version of 'meta' package: https://tinyurl.com/dt4y5drs
library(metafor)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: metadat
## 
## Loading the 'metafor' package (version 3.4-0). For an
## introduction to the package please type: help(metafor)
## 
## Attaching package: 'metafor'
## The following object is masked from 'package:rms':
## 
##     vif
madata <- metagen(TE = unlist_hr,
            seTE = unlist_se_hr,
            data = hazardperstudy,
            studlab = paste(unlist_study_names),
            fixed = TRUE,
            random = TRUE,
            method.tau = "DL",
            hakn = FALSE,
            prediction = TRUE)
## Warning in metagen(TE = unlist_hr, seTE = unlist_se_hr, data = hazardperstudy, :
## Zero values in seTE replaced by NAs.
forest(madata,fontsize = 8)