library(survival)
library(survminer)
## Warning: package 'survminer' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.3
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(ggsurvfit)
## Warning: package 'ggsurvfit' was built under R version 4.4.3
data(nafld, package = 'survival')
head(nafld1)
surv_object <- Surv(nafld1$futime, nafld1$status)
survfit(Surv(futime, status) ~ 1, data = nafld1) |>
ggsurvfit() +
labs(x = 'Days', y = 'Overall survival probability') +
add_confidence_interval()
survfit(surv_object ~ 1, data = nafld1)
## Call: survfit(formula = surv_object ~ 1, data = nafld1)
##
## n events median 0.95LCL 0.95UCL
## [1,] 17549 1364 NA NA NA
summary(survfit(surv_object ~ 1, data = nafld1), times = (547.5))
## Call: survfit(formula = surv_object ~ 1, data = nafld1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 548 16305 246 0.986 0.000909 0.984 0.987
At 1.5 years (548 days), about 98.6% of people survived.
survfit(Surv(futime, status) ~ male, data = nafld1) |>
ggsurvfit() +
add_confidence_interval() +
add_pvalue()
## ! `add_pvalue()` works with objects created with `survfit2()` or
## `tidycmprsk::cuminc()`.
## ℹ `add_pvalue()` has been ignored.
survdiff(Surv(futime, status) ~ as.factor(male), data = nafld1)
## Call:
## survdiff(formula = Surv(futime, status) ~ as.factor(male), data = nafld1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## as.factor(male)=0 9348 674 736 5.20 11.3
## as.factor(male)=1 8201 690 628 6.09 11.3
##
## Chisq= 11.3 on 1 degrees of freedom, p= 8e-04
There’s a significant difference in survival between males and females (p = 0.0008), meaning sex affects survival in this dataset.
covariates <- c("age", "weight", "height", "bmi")
univariate_results <- list()
for (i in 1:length(covariates)) {
model <- coxph(as.formula(paste("Surv(futime, status) ~", covariates[i])), data = nafld1)
univariate_results[[i]] <- model
cat(covariates[i], "p-value:",
summary(model)$coefficients[,"Pr(>|z|)"], "\n")
}
## age p-value: 0
## weight p-value: 0.0002140244
## height p-value: 1.184007e-14
## bmi p-value: 0.9082656
Age, weight, and height
significantly affect survival, but BMI does not.
full_model <- coxph(surv_object ~ age + weight + height + bmi + as.factor(male), data = nafld1)
summary(full_model)
## Call:
## coxph(formula = surv_object ~ age + weight + height + bmi + as.factor(male),
## data = nafld1)
##
## n= 12588, number of events= 1018
## (4961 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.098330 1.103327 0.002771 35.483 < 2e-16 ***
## weight 0.009734 1.009781 0.012206 0.797 0.425
## height -0.022267 0.977979 0.013629 -1.634 0.102
## bmi -0.011134 0.988927 0.033234 -0.335 0.738
## as.factor(male)1 0.550169 1.733547 0.091513 6.012 1.83e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.1033 0.9063 1.0974 1.109
## weight 1.0098 0.9903 0.9859 1.034
## height 0.9780 1.0225 0.9522 1.004
## bmi 0.9889 1.0112 0.9266 1.055
## as.factor(male)1 1.7335 0.5769 1.4489 2.074
##
## Concordance= 0.828 (se = 0.007 )
## Likelihood ratio test= 1758 on 5 df, p=<2e-16
## Wald test = 1489 on 5 df, p=<2e-16
## Score (logrank) test = 1729 on 5 df, p=<2e-16
In the full model, age and male
significantly increase the risk of death, while weight,
height, and BMI are not significant. The model
fits well with a high concordance of 0.83.
# Quick clean and model
stepwise_model <- step(coxph(Surv(futime, status) ~ age + weight + height + bmi + as.factor(male),
data = na.omit(nafld1)),
direction = "both")
## Start: AIC=15875.52
## Surv(futime, status) ~ age + weight + height + bmi + as.factor(male)
##
## Df AIC
## - bmi 1 15874
## - weight 1 15874
## <none> 15876
## - height 1 15877
## - as.factor(male) 1 15911
## - age 1 17416
##
## Step: AIC=15873.72
## Surv(futime, status) ~ age + weight + height + as.factor(male)
##
## Df AIC
## <none> 15874
## + bmi 1 15876
## - weight 1 15881
## - height 1 15888
## - as.factor(male) 1 15910
## - age 1 17419
summary(stepwise_model)
## Call:
## coxph(formula = Surv(futime, status) ~ age + weight + height +
## as.factor(male), data = na.omit(nafld1))
##
## n= 12562, number of events= 1012
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.098329 1.103326 0.002774 35.452 < 2e-16 ***
## weight 0.005647 1.005663 0.001837 3.074 0.00211 **
## height -0.018697 0.981477 0.004608 -4.058 4.95e-05 ***
## as.factor(male)1 0.564604 1.758752 0.090652 6.228 4.72e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.1033 0.9064 1.0973 1.1093
## weight 1.0057 0.9944 1.0020 1.0093
## height 0.9815 1.0189 0.9727 0.9904
## as.factor(male)1 1.7588 0.5686 1.4725 2.1007
##
## Concordance= 0.828 (se = 0.007 )
## Likelihood ratio test= 1753 on 4 df, p=<2e-16
## Wald test = 1486 on 4 df, p=<2e-16
## Score (logrank) test = 1726 on 4 df, p=<2e-16
The final model shows that age, weight,
height, and being male all affect survival.
Age and being male have the biggest impact. BMI was dropped
because it didn’t help the model. The model is strong with a concordance
of 0.83.