cleandat<- read_excel("/Users/afrozaparvin/Downloads/pone.0211513.s001.xlsx")
library(cutpointr)
set.seed(100)
cp <- cutpointr(cleandat, Age, STAT_REC,
method = maximize_metric, boot_cut = 200, summary_func = mean,
metric = accuracy, silent = TRUE)
plot(cp)
plot_metric(cp)
cleandat <- cleandat %>%
mutate(Agecat = cut(
Age,
breaks = c(0, 52, 67, 92),include.lowest = TRUE,
labels = c("-52 years", "53-67 years", "68-")
)) %>%
mutate(Event =recode(STAT_REC, "1" = "Death", "0" = "Censored")) %>%
mutate(Surgery_binary = as.numeric(recode(
Surgery, "No surgery" = "0",
"Surgery" = "1")
))
logplotdat<-subset(cleandat,srv_time_mon>0)
logplotdat<-logplotdat%>%
mutate(cens=STAT_REC<1)%>%
mutate(y=Surv(srv_time_mon,cens))
library(tableone)
table1::table1(
~ Age + Sex + Marital + Race + First_malignant_primary_tumor + Event +
srv_time_mon | Surgery,
data = cleandat,
overall = "Total",
rowlabelhead = "Characteristics",
caption = "Table 1: Characteristics of the patients in the dataset"
)
| Characteristics | No surgery (N=230) |
Surgery (N=1669) |
Total (N=1899) |
|---|---|---|---|
| Age | |||
| Mean (SD) | 53.0 (18.0) | 48.7 (14.9) | 49.2 (15.4) |
| Median [Min, Max] | 53.5 [5.00, 92.0] | 48.0 [0, 87.0] | 49.0 [0, 92.0] |
| Sex | |||
| Female | 88 (38.3%) | 744 (44.6%) | 832 (43.8%) |
| Male | 142 (61.7%) | 925 (55.4%) | 1067 (56.2%) |
| Marital | |||
| Married | 141 (61.3%) | 1044 (62.6%) | 1185 (62.4%) |
| Separated/divorced/widowed | 37 (16.1%) | 245 (14.7%) | 282 (14.8%) |
| Single | 52 (22.6%) | 380 (22.8%) | 432 (22.7%) |
| Race | |||
| Black | 12 (5.2%) | 75 (4.5%) | 87 (4.6%) |
| Others | 17 (7.4%) | 142 (8.5%) | 159 (8.4%) |
| White | 201 (87.4%) | 1452 (87.0%) | 1653 (87.0%) |
| First_malignant_primary_tumor | |||
| No | 27 (11.7%) | 155 (9.3%) | 182 (9.6%) |
| Yes | 203 (88.3%) | 1514 (90.7%) | 1717 (90.4%) |
| Event | |||
| Censored | 62 (27.0%) | 789 (47.3%) | 851 (44.8%) |
| Death | 168 (73.0%) | 880 (52.7%) | 1048 (55.2%) |
| srv_time_mon | |||
| Mean (SD) | 42.3 (58.5) | 57.9 (58.0) | 56.0 (58.3) |
| Median [Min, Max] | 16.0 [0, 368] | 38.0 [0, 354] | 33.0 [0, 368] |
fit_surgery<- survfit(Surv(srv_time_mon, STAT_REC) ~ Surgery, data = cleandat)
fit_age<- survfit(Surv(srv_time_mon, STAT_REC) ~ Agecat, data = cleandat)
fit_sex<- survfit(Surv(srv_time_mon, STAT_REC) ~ Sex, data = cleandat)
fit_marital<- survfit(Surv(srv_time_mon, STAT_REC) ~ Marital, data = cleandat)
fit_malignancies<- survfit(Surv(srv_time_mon, STAT_REC) ~ First_malignant_primary_tumor, data = cleandat)
fit_race<- survfit(Surv(srv_time_mon, STAT_REC) ~ Race, data = cleandat)
glmfit_multi<-glm(Surgery_binary~ Agecat+Sex+Race+Marital+First_malignant_primary_tumor, data=cleandat, family = "binomial")
tab_model(glmfit_multi)
| Â | Surgery_binary | ||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 10.01 | 4.59 – 23.28 | <0.001 |
| Agecat53-67 years | 0.65 | 0.47 – 0.90 | 0.010 |
| Agecat [68-] | 0.33 | 0.22 – 0.50 | <0.001 |
| Sex [Male] | 0.74 | 0.56 – 0.99 | 0.044 |
| Race [Others] | 1.25 | 0.55 – 2.76 | 0.586 |
| Race [White] | 1.24 | 0.63 – 2.26 | 0.507 |
|
Marital [Separated/divorced/widowed] |
1.00 | 0.68 – 1.51 | 0.991 |
| Marital [Single] | 0.79 | 0.55 – 1.13 | 0.193 |
|
First_malignant_primary_tumor [Yes] |
1.02 | 0.64 – 1.57 | 0.944 |
| Observations | 1899 | ||
| R2 Tjur | 0.020 | ||
fitcox_surgery<-coxph(Surv(srv_time_mon, STAT_REC) ~ Surgery_binary, data=cleandat)
library(dagitty)
dg <- dagitty('dag {
bb="0,0,1,1"
"Marital status" [pos="0.684,0.098"]
"Multiple malignancies" [pos="0.510,0.493"]
"Overall Survival Prognosis" [outcome,pos="0.596,0.297"]
Age [pos="0.314,0.092"]
Race [pos="0.497,0.104"]
Sex [pos="0.309,0.488"]
Surgery [exposure,pos="0.202,0.285"]
"Marital status" -> "Overall Survival Prognosis"
"Multiple malignancies" -> "Overall Survival Prognosis"
Age -> "Overall Survival Prognosis"
Age -> Surgery
Race -> "Overall Survival Prognosis"
Sex -> "Overall Survival Prognosis"
Sex -> Surgery
Surgery -> "Overall Survival Prognosis"
}')
plot(dg)
fitcox_adj <-
coxph(
Surv(srv_time_mon, STAT_REC) ~ Surgery_binary * First_malignant_primary_tumor +
Agecat + Sex + Marital + Race,
data = cleandat
)
s<-step(fitcox_adj)
Start: AIC=14044.62 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary * First_malignant_primary_tumor + Agecat + Sex + Marital + Race
Df AIC
Step: AIC=14042.62 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Marital + Race
Df AIC
Step: AIC=14042.28 Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Race
Df AIC
fitcox_step1 <-
coxph(
Surv(srv_time_mon, STAT_REC) ~ Surgery_binary + First_malignant_primary_tumor +
Agecat + Sex + Marital + Race,
data = cleandat
)
fitcox_step2 <-
coxph(
Surv(srv_time_mon, STAT_REC) ~Surgery_binary + First_malignant_primary_tumor +
Agecat + Sex + Race,
data = cleandat
)
ANO<-anova(fitcox_adj,fitcox_step1, fitcox_step2)
print(ANO)
## Analysis of Deviance Table
## Cox model: response is Surv(srv_time_mon, STAT_REC)
## Model 1: ~ Surgery_binary * First_malignant_primary_tumor + Agecat + Sex + Marital + Race
## Model 2: ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Marital + Race
## Model 3: ~ Surgery_binary + First_malignant_primary_tumor + Agecat + Sex + Race
## loglik Chisq Df P(>|Chi|)
## 1 -7012.3
## 2 -7012.3 0.0006 1 0.9804
## 3 -7014.1 3.6563 2 0.1607
tab_model(fitcox_surgery,fitcox_step1, fitcox_adj,show.se=TRUE, show.aic = TRUE, show.loglik = TRUE)
| Â |
Surv(srv_time_mon, STAT_REC) |
Surv(srv_time_mon, STAT_REC) |
Surv(srv_time_mon, STAT_REC) |
|||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | CI | p | Estimates | std. Error | CI | p | Estimates | std. Error | CI | p |
| Surgery_binary | 0.54 | 0.05 | 0.46 – 0.64 | <0.001 | 0.63 | 0.05 | 0.53 – 0.74 | <0.001 | 0.62 | 0.15 | 0.39 – 0.99 | 0.044 |
|
First_malignant_primary_tumor [Yes] |
0.67 | 0.07 | 0.55 – 0.81 | <0.001 | 0.67 | 0.15 | 0.42 – 1.05 | 0.079 | ||||
| Agecat53-67 years | 2.07 | 0.15 | 1.79 – 2.39 | <0.001 | 2.07 | 0.15 | 1.79 – 2.39 | <0.001 | ||||
| Agecat [68-] | 5.67 | 0.54 | 4.71 – 6.84 | <0.001 | 5.67 | 0.54 | 4.71 – 6.84 | <0.001 | ||||
| Sex [Male] | 1.21 | 0.08 | 1.07 – 1.38 | 0.002 | 1.21 | 0.08 | 1.07 – 1.38 | 0.002 | ||||
|
Marital [Separated/divorced/widowed] |
1.08 | 0.09 | 0.91 – 1.28 | 0.358 | 1.08 | 0.09 | 0.91 – 1.28 | 0.358 | ||||
| Marital [Single] | 1.16 | 0.09 | 0.99 – 1.36 | 0.065 | 1.16 | 0.09 | 0.99 – 1.36 | 0.065 | ||||
| Race [Others] | 0.74 | 0.13 | 0.52 – 1.05 | 0.088 | 0.74 | 0.13 | 0.52 – 1.05 | 0.088 | ||||
| Race [White] | 0.72 | 0.10 | 0.54 – 0.95 | 0.022 | 0.72 | 0.10 | 0.54 – 0.95 | 0.022 | ||||
|
Surgery_binary * First_malignant_primary_tumor [Yes] |
1.01 | 0.25 | 0.61 – 1.65 | 0.980 | ||||||||
| Observations | 1899 | 1899 | 1899 | |||||||||
| R2 Nagelkerke | 0.024 | 0.197 | 0.197 | |||||||||
| AIC | 14396.241 | 14042.621 | 14044.621 | |||||||||
| log-Likelihood | -7197.121 | -7012.311 | -7012.310 | |||||||||
###Figure 3: Plots of scaled Schoenfeld residuals against transformed time for each covariate in a model fit to the recidivism data. The solid line is a smoothing spline fit to the plot, with the broken lines representing a ± 2-standard-error band around the fit.
cox.zph(fitcox_adj)
chisq df p
Surgery_binary 13.802 1 0.00020 First_malignant_primary_tumor 1.485 1 0.22300 Agecat 16.463 2 0.00027 Sex 0.298 1 0.58510 Marital 0.694 2 0.70665 Race 3.933 2 0.13996 Surgery_binary:First_malignant_primary_tumor 5.872 1 0.01538 GLOBAL 35.569 10 1e-04
par(mfrow=c(4, 2))
plot(cox.zph(fitcox_adj))
fitplotcox_surgery<-survfit(Surv(srv_time_mon, STAT_REC)~Surgery_binary, data=logplotdat)
ggsurvplot(fitplotcox_surgery,fun="cloglog")