#periksa missing value
str(data)
## tibble [312 × 21] (S3: tbl_df/tbl/data.frame)
## $ rownames: num [1:312] 1 2 3 4 5 6 7 8 9 10 ...
## $ id : num [1:312] 1 2 3 4 5 6 7 8 9 10 ...
## $ time : num [1:312] 400 4500 1012 1925 1504 ...
## $ status : num [1:312] 1 0 1 1 1 1 0 1 1 1 ...
## $ trt : num [1:312] 1 1 1 1 2 2 2 2 1 2 ...
## $ age : num [1:312] 58.8 56.4 70.1 54.7 38.1 ...
## $ sex : Factor w/ 2 levels "f","m": 1 1 2 1 1 1 1 1 1 1 ...
## $ ascites : num [1:312] 1 0 0 0 0 0 0 0 0 1 ...
## $ hepato : num [1:312] 1 1 0 1 1 1 1 0 0 0 ...
## $ spiders : num [1:312] 1 1 0 1 1 0 0 0 1 1 ...
## $ edema : num [1:312] 1 0 0.5 0.5 0 0 0 0 0 1 ...
## $ bili : num [1:312] 14.5 1.1 1.4 1.8 3.4 0.8 1 0.3 3.2 12.6 ...
## $ chol : num [1:312] 261 302 176 244 279 248 322 280 562 200 ...
## $ albumin : num [1:312] 2.6 4.14 3.48 2.54 3.53 3.98 4.09 4 3.08 2.74 ...
## $ copper : num [1:312] 156 54 210 64 143 50 52 52 79 140 ...
## $ alk.phos: num [1:312] 1718 7395 516 6122 671 ...
## $ ast : num [1:312] 137.9 113.5 96.1 60.6 113.2 ...
## $ trig : num [1:312] 172 88 55 92 72 63 213 189 88 143 ...
## $ platelet: num [1:312] 190 221 151 183 136 261 204 373 251 302 ...
## $ protime : num [1:312] 12.2 10.6 12 10.3 10.9 11 9.7 11 11 11.5 ...
## $ stage : num [1:312] 4 3 4 4 3 3 3 3 2 4 ...
sapply(data, function(x) sum(is.na(x)))
## rownames id time status trt age sex ascites
## 0 0 0 0 0 0 0 0
## hepato spiders edema bili chol albumin copper alk.phos
## 0 0 0 0 0 0 0 0
## ast trig platelet protime stage
## 0 0 0 0 0
#deskripsi karakteristik pasien
summary(data$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26.28 42.24 49.79 50.02 56.71 78.44
# Pastikan sex adalah faktor
data$sex <- factor(data$sex, labels = c("Female", "Male"))
table(data$sex)
##
## Female Male
## 276 36
# Jika punya stage
if("stage" %in% names(data)) {
table(data$stage)
}
##
## 1 2 3 4
## 16 67 120 109
#Kaplan Meier #Objek Survival
surv_obj <- Surv(time = data$time, event = data$status)
#KM Curve
km_overall <- survfit(surv_obj ~ 1, data = data)
ggsurvplot(
km_overall,
data = data,
conf.int = TRUE,
risk.table = TRUE,
title = "Kaplan–Meier Survival Curve (Overall)",
ggtheme = theme_minimal()
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • colour : "Strata"
#KM SEX
km_sex <- survfit(surv_obj ~ sex, data = data)
ggsurvplot(
km_sex,
data = data,
conf.int = TRUE,
pval = TRUE,
risk.table = TRUE,
title = "KM Curve by Sex",
ggtheme = theme_minimal()
)
## Ignoring unknown labels:
## • colour : "Strata"
#LOGRANK
survdiff(surv_obj ~ sex, data = data)
## Call:
## survdiff(formula = surv_obj ~ sex, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=Female 276 119 127.4 0.554 4.85
## sex=Male 36 25 16.6 4.248 4.85
##
## Chisq= 4.9 on 1 degrees of freedom, p= 0.03
#Cox Proportional hazard
cox_multi <- coxph(Surv(time, status) ~ age + sex + albumin, data = data)
summary(cox_multi)
## Call:
## coxph(formula = Surv(time, status) ~ age + sex + albumin, data = data)
##
## n= 312, number of events= 144
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.014056 1.014155 0.008631 1.629 0.1034
## sexMale 0.557374 1.746081 0.226510 2.461 0.0139 *
## albumin -1.611318 0.199624 0.197766 -8.148 3.71e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0142 0.9860 0.9971 1.0315
## sexMale 1.7461 0.5727 1.1201 2.7219
## albumin 0.1996 5.0094 0.1355 0.2941
##
## Concordance= 0.715 (se = 0.023 )
## Likelihood ratio test= 70.84 on 3 df, p=3e-15
## Wald test = 82.89 on 3 df, p=<2e-16
## Score (logrank) test = 80.76 on 3 df, p=<2e-16
#HR CI
exp(cbind(HR = coef(cox_multi), confint(cox_multi)))
## HR 2.5 % 97.5 %
## age 1.0141548 0.9971441 1.031456
## sexMale 1.7460812 1.1201045 2.721888
## albumin 0.1996243 0.1354798 0.294139
#Pemeriksaan PH
ph_test <- cox.zph(cox_multi)
ph_test
## chisq df p
## age 0.4829 1 0.487
## sex 0.0997 1 0.752
## albumin 3.0182 1 0.082
## GLOBAL 3.2816 3 0.350
#Plot PH
plot(ph_test)
#Visualisasi #forst plot HI CI
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(ggplot2)
library(dplyr)
# Model Cox
cox_multi <- coxph(Surv(time, status) ~ age + sex + albumin, data = data)
# Ambil HR + CI dengan broom
forest_df <- tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE)
# Hapus intercept
forest_df <- forest_df %>% filter(term != "(Intercept)")
# Forest Plot
ggplot(forest_df, aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_pointrange() +
geom_hline(yintercept = 1, linetype = "dashed") +
coord_flip() +
labs(
title = "Forest Plot – Hazard Ratio (HR) with 95% CI",
x = "Variabel",
y = "Hazard Ratio (HR)"
) +
theme_minimal()
#Cumulative error
km_fit <- survfit(surv_obj ~ sex, data = data)
ggsurvplot(
km_fit,
data = data,
fun = "cumhaz",
conf.int = TRUE,
pval = TRUE,
title = "Cumulative Hazard Plot by Sex",
ggtheme = theme_minimal()
)