cleandat<- read_excel("/Users/afrozaparvin/Downloads/pone.0211513.s001.xlsx")
library(cutpointr) 

set.seed(100)
cp <- cutpointr(cleandat, Age, STAT_REC,
                method = maximize_metric, boot_cut = 200, summary_func = mean,
                metric = accuracy, silent = TRUE)
plot(cp)

plot_metric(cp)

cleandat <- cleandat %>%
  mutate(Agecat = cut(
    Age,
    breaks = c(0, 52, 67, 92),include.lowest = TRUE,
    labels = c("-52 years", "53-67 years", "68-")
  )) %>%
  mutate(Event =recode(STAT_REC, "1" = "Death", "0" = "Censored")) %>%
  mutate(Surgery_binary = as.numeric(recode(
    Surgery, "No surgery" = "0",
    "Surgery" = "1")
  ))
logplotdat<-subset(cleandat,srv_time_mon>0)
logplotdat<-logplotdat%>%
  mutate(cens=STAT_REC<1)%>%
  mutate(y=Surv(srv_time_mon,cens))
library(tableone)
table1::table1(
  ~ Age + Sex + Marital + Race + First_malignant_primary_tumor + Event +
    srv_time_mon | Surgery,
  data = cleandat,
  overall = "Total",
  rowlabelhead = "Characteristics",
  caption = "Table 1: Characteristics of the patients in the dataset"
) 
Table 1: Characteristics of the patients in the dataset
Characteristics No surgery
(N=230)
Surgery
(N=1669)
Total
(N=1899)
Age
Mean (SD) 53.0 (18.0) 48.7 (14.9) 49.2 (15.4)
Median [Min, Max] 53.5 [5.00, 92.0] 48.0 [0, 87.0] 49.0 [0, 92.0]
Sex
Female 88 (38.3%) 744 (44.6%) 832 (43.8%)
Male 142 (61.7%) 925 (55.4%) 1067 (56.2%)
Marital
Married 141 (61.3%) 1044 (62.6%) 1185 (62.4%)
Separated/divorced/widowed 37 (16.1%) 245 (14.7%) 282 (14.8%)
Single 52 (22.6%) 380 (22.8%) 432 (22.7%)
Race
Black 12 (5.2%) 75 (4.5%) 87 (4.6%)
Others 17 (7.4%) 142 (8.5%) 159 (8.4%)
White 201 (87.4%) 1452 (87.0%) 1653 (87.0%)
First_malignant_primary_tumor
No 27 (11.7%) 155 (9.3%) 182 (9.6%)
Yes 203 (88.3%) 1514 (90.7%) 1717 (90.4%)
Event
Censored 62 (27.0%) 789 (47.3%) 851 (44.8%)
Death 168 (73.0%) 880 (52.7%) 1048 (55.2%)
srv_time_mon
Mean (SD) 42.3 (58.5) 57.9 (58.0) 56.0 (58.3)
Median [Min, Max] 16.0 [0, 368] 38.0 [0, 354] 33.0 [0, 368]

fit_surgery<- survfit(Surv(srv_time_mon, STAT_REC) ~ Surgery, data = cleandat) 
fit_age<- survfit(Surv(srv_time_mon, STAT_REC) ~ Agecat, data = cleandat)
fit_sex<- survfit(Surv(srv_time_mon, STAT_REC) ~ Sex, data = cleandat) 
fit_marital<- survfit(Surv(srv_time_mon, STAT_REC) ~ Marital, data = cleandat) 
fit_malignancies<- survfit(Surv(srv_time_mon, STAT_REC) ~ First_malignant_primary_tumor, data = cleandat)
fit_race<- survfit(Surv(srv_time_mon, STAT_REC) ~ Race, data = cleandat) 

glmfit_multi<-glm(Surgery_binary~ Agecat+Sex+Race+Marital+First_malignant_primary_tumor, data=cleandat, family = "binomial")
tab_model(glmfit_multi)
  Surgery_binary
Predictors Odds Ratios CI p
(Intercept) 10.01 4.59 – 23.28 <0.001
Agecat53-67 years 0.65 0.47 – 0.90 0.010
Agecat [68-] 0.33 0.22 – 0.50 <0.001
Sex [Male] 0.74 0.56 – 0.99 0.044
Race [Others] 1.25 0.55 – 2.76 0.586
Race [White] 1.24 0.63 – 2.26 0.507
Marital
[Separated/divorced/widowed]
1.00 0.68 – 1.51 0.991
Marital [Single] 0.79 0.55 – 1.13 0.193
First_malignant_primary_tumor
[Yes]
1.02 0.64 – 1.57 0.944
Observations 1899
R2 Tjur 0.020
fitcox_surgery<-coxph(Surv(srv_time_mon, STAT_REC) ~ Surgery_binary, data=cleandat)
 library(dagitty)
dg <- dagitty('dag {
  bb="0,0,1,1"
  "Marital status" [pos="0.684,0.098"]
  "Multiple malignancies" [pos="0.510,0.493"]
  "Overall Survival Prognosis" [outcome,pos="0.596,0.297"]
  Age [pos="0.314,0.092"]
  Race [pos="0.497,0.104"]
  Sex [pos="0.309,0.488"]
  Surgery [exposure,pos="0.202,0.285"]
  "Marital status" -> "Overall Survival Prognosis"
  "Multiple malignancies" -> "Overall Survival Prognosis"
  Age -> "Overall Survival Prognosis"
  Age -> Surgery
  Race -> "Overall Survival Prognosis"
  Sex -> "Overall Survival Prognosis"
  Sex -> Surgery
  Surgery -> "Overall Survival Prognosis"
}')

plot(dg)

fitcox_adj <-
  coxph(
    Surv(srv_time_mon, STAT_REC) ~ Surgery_binary * First_malignant_primary_tumor +
      Agecat + Sex + Marital + Race,
    data = cleandat
  )
s<-step(fitcox_adj)

Start: AIC=14044.62 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary * First_malignant_primary_tumor + Agecat + Sex + Marital + Race

                                           Df   AIC

Step: AIC=14042.62 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Marital + Race

                            Df   AIC

Step: AIC=14042.28 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Race

                            Df   AIC

14042 - Race 2 14044 - Sex 1 14049 - First_malignant_primary_tumor 1 14056 - Surgery_binary 1 14068 - Agecat 2 14350

fitcox_step1 <-
  coxph(
    Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor + 
    Agecat + Sex + Marital + Race,
    data = cleandat
  )
fitcox_step2 <-
  coxph(
    Surv(srv_time_mon, STAT_REC) ~Surgery_binary + First_malignant_primary_tumor + 
    Agecat + Sex + Race,
    data = cleandat
  )
ANO<-anova(fitcox_adj,fitcox_step1, fitcox_step2)
print(ANO)
## Analysis of Deviance Table
##  Cox model: response is  Surv(srv_time_mon, STAT_REC)
##  Model 1: ~ Surgery_binary * First_malignant_primary_tumor + Agecat + Sex + Marital + Race
##  Model 2: ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Marital + Race
##  Model 3: ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Race
##    loglik  Chisq Df P(>|Chi|)
## 1 -7012.3                    
## 2 -7012.3 0.0006  1    0.9804
## 3 -7014.1 3.6563  2    0.1607
tab_model(fitcox_surgery,fitcox_step1, fitcox_adj,show.se=TRUE, show.aic = TRUE, show.loglik = TRUE)
  Surv(srv_time_mon,
STAT_REC)
Surv(srv_time_mon,
STAT_REC)
Surv(srv_time_mon,
STAT_REC)
Predictors Estimates std. Error CI p Estimates std. Error CI p Estimates std. Error CI p
Surgery_binary 0.54 0.05 0.46 – 0.64 <0.001 0.63 0.05 0.53 – 0.74 <0.001 0.62 0.15 0.39 – 0.99 0.044
First_malignant_primary_tumor
[Yes]
0.67 0.07 0.55 – 0.81 <0.001 0.67 0.15 0.42 – 1.05 0.079
Agecat53-67 years 2.07 0.15 1.79 – 2.39 <0.001 2.07 0.15 1.79 – 2.39 <0.001
Agecat [68-] 5.67 0.54 4.71 – 6.84 <0.001 5.67 0.54 4.71 – 6.84 <0.001
Sex [Male] 1.21 0.08 1.07 – 1.38 0.002 1.21 0.08 1.07 – 1.38 0.002
Marital
[Separated/divorced/widowed]
1.08 0.09 0.91 – 1.28 0.358 1.08 0.09 0.91 – 1.28 0.358
Marital [Single] 1.16 0.09 0.99 – 1.36 0.065 1.16 0.09 0.99 – 1.36 0.065
Race [Others] 0.74 0.13 0.52 – 1.05 0.088 0.74 0.13 0.52 – 1.05 0.088
Race [White] 0.72 0.10 0.54 – 0.95 0.022 0.72 0.10 0.54 – 0.95 0.022
Surgery_binary *
First_malignant_primary_tumor
[Yes]
1.01 0.25 0.61 – 1.65 0.980
Observations 1899 1899 1899
R2 Nagelkerke 0.024 0.197 0.197
AIC 14396.241 14042.621 14044.621
log-Likelihood -7197.121 -7012.311 -7012.310

###Figure 3: Plots of scaled Schoenfeld residuals against transformed time for each covariate in a model fit to the recidivism data. The solid line is a smoothing spline fit to the plot, with the broken lines representing a ± 2-standard-error band around the fit.

cox.zph(fitcox_adj)
                                          chisq df       p

Surgery_binary 13.802 1 0.00020 First_malignant_primary_tumor 1.485 1 0.22300 Agecat 16.463 2 0.00027 Sex 0.298 1 0.58510 Marital 0.694 2 0.70665 Race 3.933 2 0.13996 Surgery_binary:First_malignant_primary_tumor 5.872 1 0.01538 GLOBAL 35.569 10 1e-04

par(mfrow=c(4, 2)) 
plot(cox.zph(fitcox_adj))

fitplotcox_surgery<-survfit(Surv(srv_time_mon, STAT_REC)~Surgery_binary, data=logplotdat)
ggsurvplot(fitplotcox_surgery,fun="cloglog")