#periksa missing value

str(data)
## tibble [312 × 21] (S3: tbl_df/tbl/data.frame)
##  $ rownames: num [1:312] 1 2 3 4 5 6 7 8 9 10 ...
##  $ id      : num [1:312] 1 2 3 4 5 6 7 8 9 10 ...
##  $ time    : num [1:312] 400 4500 1012 1925 1504 ...
##  $ status  : num [1:312] 1 0 1 1 1 1 0 1 1 1 ...
##  $ trt     : num [1:312] 1 1 1 1 2 2 2 2 1 2 ...
##  $ age     : num [1:312] 58.8 56.4 70.1 54.7 38.1 ...
##  $ sex     : Factor w/ 2 levels "f","m": 1 1 2 1 1 1 1 1 1 1 ...
##  $ ascites : num [1:312] 1 0 0 0 0 0 0 0 0 1 ...
##  $ hepato  : num [1:312] 1 1 0 1 1 1 1 0 0 0 ...
##  $ spiders : num [1:312] 1 1 0 1 1 0 0 0 1 1 ...
##  $ edema   : num [1:312] 1 0 0.5 0.5 0 0 0 0 0 1 ...
##  $ bili    : num [1:312] 14.5 1.1 1.4 1.8 3.4 0.8 1 0.3 3.2 12.6 ...
##  $ chol    : num [1:312] 261 302 176 244 279 248 322 280 562 200 ...
##  $ albumin : num [1:312] 2.6 4.14 3.48 2.54 3.53 3.98 4.09 4 3.08 2.74 ...
##  $ copper  : num [1:312] 156 54 210 64 143 50 52 52 79 140 ...
##  $ alk.phos: num [1:312] 1718 7395 516 6122 671 ...
##  $ ast     : num [1:312] 137.9 113.5 96.1 60.6 113.2 ...
##  $ trig    : num [1:312] 172 88 55 92 72 63 213 189 88 143 ...
##  $ platelet: num [1:312] 190 221 151 183 136 261 204 373 251 302 ...
##  $ protime : num [1:312] 12.2 10.6 12 10.3 10.9 11 9.7 11 11 11.5 ...
##  $ stage   : num [1:312] 4 3 4 4 3 3 3 3 2 4 ...
sapply(data, function(x) sum(is.na(x)))
## rownames       id     time   status      trt      age      sex  ascites 
##        0        0        0        0        0        0        0        0 
##   hepato  spiders    edema     bili     chol  albumin   copper alk.phos 
##        0        0        0        0        0        0        0        0 
##      ast     trig platelet  protime    stage 
##        0        0        0        0        0

#deskripsi karakteristik pasien

summary(data$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   26.28   42.24   49.79   50.02   56.71   78.44
# Pastikan sex adalah faktor

data$sex <- factor(data$sex, labels = c("Female", "Male"))
table(data$sex)
## 
## Female   Male 
##    276     36
# Jika punya stage

if("stage" %in% names(data)) {
table(data$stage)
}
## 
##   1   2   3   4 
##  16  67 120 109

#Kaplan Meier #Objek Survival

surv_obj <- Surv(time = data$time, event = data$status)

#KM Curve

km_overall <- survfit(surv_obj ~ 1, data = data)

ggsurvplot(
km_overall,
data = data,
conf.int = TRUE,
risk.table = TRUE,
title = "Kaplan–Meier Survival Curve (Overall)",
ggtheme = theme_minimal()
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • colour : "Strata"

#KM SEX

km_sex <- survfit(surv_obj ~ sex, data = data)

ggsurvplot(
km_sex,
data = data,
conf.int = TRUE,
pval = TRUE,
risk.table = TRUE,
title = "KM Curve by Sex",
ggtheme = theme_minimal()
)
## Ignoring unknown labels:
## • colour : "Strata"

#LOGRANK

survdiff(surv_obj ~ sex, data = data)
## Call:
## survdiff(formula = surv_obj ~ sex, data = data)
## 
##              N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=Female 276      119    127.4     0.554      4.85
## sex=Male    36       25     16.6     4.248      4.85
## 
##  Chisq= 4.9  on 1 degrees of freedom, p= 0.03

#Cox Proportional hazard

cox_multi <- coxph(Surv(time, status) ~ age + sex + albumin, data = data)
summary(cox_multi)
## Call:
## coxph(formula = Surv(time, status) ~ age + sex + albumin, data = data)
## 
##   n= 312, number of events= 144 
## 
##              coef exp(coef)  se(coef)      z Pr(>|z|)    
## age      0.014056  1.014155  0.008631  1.629   0.1034    
## sexMale  0.557374  1.746081  0.226510  2.461   0.0139 *  
## albumin -1.611318  0.199624  0.197766 -8.148 3.71e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## age        1.0142     0.9860    0.9971    1.0315
## sexMale    1.7461     0.5727    1.1201    2.7219
## albumin    0.1996     5.0094    0.1355    0.2941
## 
## Concordance= 0.715  (se = 0.023 )
## Likelihood ratio test= 70.84  on 3 df,   p=3e-15
## Wald test            = 82.89  on 3 df,   p=<2e-16
## Score (logrank) test = 80.76  on 3 df,   p=<2e-16

#HR CI

exp(cbind(HR = coef(cox_multi), confint(cox_multi)))
##                HR     2.5 %   97.5 %
## age     1.0141548 0.9971441 1.031456
## sexMale 1.7460812 1.1201045 2.721888
## albumin 0.1996243 0.1354798 0.294139

#Pemeriksaan PH

ph_test <- cox.zph(cox_multi)
ph_test
##          chisq df     p
## age     0.4829  1 0.487
## sex     0.0997  1 0.752
## albumin 3.0182  1 0.082
## GLOBAL  3.2816  3 0.350

#Plot PH

plot(ph_test)

#Visualisasi #forst plot HI CI

library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(ggplot2)
library(dplyr)

# Model Cox
cox_multi <- coxph(Surv(time, status) ~ age + sex + albumin, data = data)

# Ambil HR + CI dengan broom
forest_df <- tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE)

# Hapus intercept
forest_df <- forest_df %>% filter(term != "(Intercept)")

# Forest Plot
ggplot(forest_df, aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() +
  geom_hline(yintercept = 1, linetype = "dashed") +
  coord_flip() +
  labs(
    title = "Forest Plot – Hazard Ratio (HR) with 95% CI",
    x = "Variabel",
    y = "Hazard Ratio (HR)"
  ) +
  theme_minimal()

#Cumulative error

km_fit <- survfit(surv_obj ~ sex, data = data)

ggsurvplot(
km_fit,
data = data,
fun = "cumhaz",
conf.int = TRUE,
pval = TRUE,
title = "Cumulative Hazard Plot by Sex",
ggtheme = theme_minimal()
)