library(survival)
library(ggplot2)
library(ggpubr)
library(survminer)
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(concordance)
options(scipen=999)
data(nafld)
head(nafld1)
## id age male weight height bmi case.id futime status
## 3631 1 57 0 60.0 163 22.69094 10630 6261 0
## 8458 2 67 0 70.4 168 24.88403 14817 624 0
## 6298 3 53 1 105.8 186 30.45354 3 1783 0
## 15398 4 56 1 109.3 170 37.83010 6628 3143 0
## 13261 5 68 1 NA NA NA 1871 1836 1
## 14423 6 39 0 63.9 155 26.61559 7144 1581 0
head(nafld2)
## id days test value
## 135077 1 -459 hdl 75
## 313143 1 -459 chol 75
## 135078 1 183 hdl 64
## 313144 1 183 chol 64
## 135079 1 2030 hdl 74
## 313145 1 2030 chol 74
head(nafld3)
## id days event
## 2 3 0 nafld
## 3 4 -1287 htn
## 4 4 -1287 dyslipidemia
## 5 4 -1226 ang/isc
## 7 4 -90 ang/isc
## 8 5 617 htn
df = nafld1
head(df)
## id age male weight height bmi case.id futime status
## 3631 1 57 0 60.0 163 22.69094 10630 6261 0
## 8458 2 67 0 70.4 168 24.88403 14817 624 0
## 6298 3 53 1 105.8 186 30.45354 3 1783 0
## 15398 4 56 1 109.3 170 37.83010 6628 3143 0
## 13261 5 68 1 NA NA NA 1871 1836 1
## 14423 6 39 0 63.9 155 26.61559 7144 1581 0
Variabel tersensor adalah status. status bernilai 0 artinya dia tersensor.
fit <- survfit(Surv(futime, status)~1, data = df)
surv_table <- data.frame(time=fit$time,
n_risk=fit$n.risk,
n_event=fit$n.event,
n_censor=fit$n.censor,
sur_prob=fit$surv)
head(surv_table)
## time n_risk n_event n_censor sur_prob
## 1 7 17549 0 3 1.000000
## 2 9 17546 0 3 1.000000
## 3 10 17543 1 4 0.999943
## 4 11 17538 1 2 0.999886
## 5 12 17535 0 1 0.999886
## 6 13 17534 0 2 0.999886
Kurva yang menunjukkan peluang survive di setiap waktunya. Berdasarkan Grafik di bawah ini, dapat dikatakan bahwa peluang survive penderita penyakit paru-paru menurun seiring waktu berjalan.
plot(x=surv_table$time,
y=surv_table$sur_prob,
type="l",
ylab="survival probability",
xlab="time")
Jika variabel yang diteliti bertipe kategori, maka digunakan kurva kaplan Meyer. Fungsinya untuk mengetahui apakah terdapat perbedaan survival diantara kategori tersebut. Dalam penelitian ini, ingin diketahui apakah penyakit paru-paru tergantung dari jenis kelaminnya.
ggsurvplot(fit,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette="blue",
legend="none",
xlab = "Time in Days") +
ggtitle("Kaplan Meier Curve nafld")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
fit2 <- survfit(Surv(futime,status)~male,data=df)
surv_table <- data.frame(time=fit2$time,
n_risk=fit2$n.risk,
n_event = fit2$n.event,
n_censor = fit2$n.censor,
sur_prob=fit2$surv)
head(surv_table)
## time n_risk n_event n_censor sur_prob
## 1 7 9348 0 1 1.0000000
## 2 9 9347 0 2 1.0000000
## 3 10 9345 0 2 1.0000000
## 4 11 9343 1 0 0.9998930
## 5 15 9342 2 0 0.9996789
## 6 18 9340 0 1 0.9996789
fit2 <- survfit(Surv(futime, status)~male, data = df)
ggsurvplot(fit2,
pval = TRUE,
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("red","blue"),
legend="left",
legend.labs = c("Male", "Female"),
xlab = "Time in Days") +
ggtitle("Kaplan Meier Curve Lung Dataset")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
Jika kaplan meyer hanya untuk variabel kategorik, maka model regresi cox proportional hazard bisa dari variabel numerik maupun kategorik (Lauaknya regresi linier berganda.).
options(scipen=999)
df$male <- as.factor(df$male) # convert to dummy variable
model_coxph <- coxph (Surv(futime, status) ~ age + male + weight + height +
bmi,data=df)
summary(model_coxph)
## Call:
## coxph(formula = Surv(futime, status) ~ age + male + weight +
## height + bmi, data = df)
##
## n= 12588, number of events= 1018
## (4961 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.098330 1.103327 0.002771 35.483 < 0.0000000000000002 ***
## male1 0.550169 1.733547 0.091513 6.012 0.00000000183 ***
## weight 0.009734 1.009781 0.012206 0.797 0.425
## height -0.022267 0.977979 0.013629 -1.634 0.102
## bmi -0.011134 0.988927 0.033234 -0.335 0.738
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.1033 0.9063 1.0974 1.109
## male1 1.7335 0.5769 1.4489 2.074
## weight 1.0098 0.9903 0.9859 1.034
## height 0.9780 1.0225 0.9522 1.004
## bmi 0.9889 1.0112 0.9266 1.055
##
## Concordance= 0.828 (se = 0.007 )
## Likelihood ratio test= 1758 on 5 df, p=<0.0000000000000002
## Wald test = 1489 on 5 df, p=<0.0000000000000002
## Score (logrank) test = 1729 on 5 df, p=<0.0000000000000002
options(scipen=999)
df$male <- as.factor(df$male) # convert to dummy variable
model_exp <- survreg(Surv(futime, status) ~ age + male + weight + height +
bmi,dist="exponential",data=df)
summary(model_exp)
##
## Call:
## survreg(formula = Surv(futime, status) ~ age + male + weight +
## height + bmi, data = df, dist = "exponential")
## Value Std. Error z p
## (Intercept) 12.63031 2.22604 5.67 0.0000000140
## age -0.09252 0.00265 -34.91 < 0.0000000000000002
## male1 -0.53499 0.09073 -5.90 0.0000000037
## weight -0.01029 0.01193 -0.86 0.389
## height 0.02419 0.01340 1.81 0.071
## bmi 0.01422 0.03245 0.44 0.661
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -10669.3 Loglik(intercept only)= -11502.9
## Chisq= 1667.3 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 7
## n=12588 (4961 observations deleted due to missingness)
concordance(model_coxph)
## Call:
## concordance.coxph(object = model_coxph)
##
## n= 12588
## Concordance= 0.8281 se= 0.007028
## concordant discordant tied.x tied.y tied.xy
## 6415282 1331979 1 146 0
concordance(model_exp)
## Call:
## concordance.survreg(object = model_exp)
##
## n= 12588
## Concordance= 0.8281 se= 0.007029
## concordant discordant tied.x tied.y tied.xy
## 6415221 1332040 1 146 0