Tugas Kelompok Analisis Ketahanan Hidup SD-A1, Program Studi Teknologi Sains Data, Fakultas Teknologi Maju dan Multidisiplin, Universitas Airlangga, Surabaya.
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(survival)
## Warning: package 'survival' was built under R version 4.2.3
library(survminer)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.2.3
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
df <- read_excel("D:/SEMESTER 5/Analisis Ketahanan Hidup/M9&10- Fungsi Survival Parametrik/Dataset1.xlsx")
df
## # A tibble: 42 × 5
## Time Status Rx Log_WBC Sex
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 35 0 0 1.45 1
## 2 34 0 0 1.47 1
## 3 32 0 0 2.2 1
## 4 32 0 0 2.53 1
## 5 25 0 0 1.78 1
## 6 23 1 0 2.57 1
## 7 23 1 1 1.97 1
## 8 22 1 0 2.32 1
## 9 22 1 1 2.73 0
## 10 20 0 0 2.01 1
## # ℹ 32 more rows
# Estimasi lambda dari rata-rata waktu bertahan
lambda_hat <- 1 / mean(df$Time)
# Q-Q Plot
qqplot(qexp(ppoints(length(df$Time)), rate = lambda_hat), df$Time,
main = "Exponential Q-Q Plot",
xlab = "Theoretical Quantiles",
ylab = "Sample Quantiles")
abline(0, 1, col = "red")
Interpretasi: titik-titik pada Q-Q plot mendekati garis lurus, maka data mungkin cocok dengan distribusi eksponensial.
ks_test <- ks.test(df$Time, "pexp", rate = lambda_hat)
## Warning in ks.test.default(df$Time, "pexp", rate = lambda_hat): ties should not
## be present for the Kolmogorov-Smirnov test
print(ks_test)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: df$Time
## D = 0.15808, p-value = 0.2447
## alternative hypothesis: two-sided
H0 : Data waktu bertahan hidup mengikuti distribusi eksponensial.
H1 : Data waktu bertahan hidup tidak mengikuti distribusi eksponensial.
Hasil : p-value di atas 0.05 sehingga dapat ditarik keputusan untuk Gagal Tolak H0.
Interpretasi : Data waktu bertahan hidup mengikuti distribusi eksponensial.
# Tentukan interval waktu bertahan dan hitung frekuensi empiris
breaks <- seq(0, max(df$Time), by = 5)
observed <- hist(df$Time, breaks = breaks, plot = FALSE)$counts
# Hitung frekuensi teoretis berdasarkan distribusi eksponensial
expected <- diff(pexp(breaks, rate = lambda_hat)) * length(df$Time)
# Uji Chi-Square
chisq_test <- chisq.test(observed, p = expected, rescale.p = TRUE)
## Warning in chisq.test(observed, p = expected, rescale.p = TRUE): Chi-squared
## approximation may be incorrect
print(chisq_test)
##
## Chi-squared test for given probabilities
##
## data: observed
## X-squared = 10.691, df = 6, p-value = 0.0984
H0 : Data waktu bertahan hidup mengikuti distribusi eksponensial.
H1 : Data waktu bertahan hidup tidak mengikuti distribusi eksponensial.
Hasil : p-value di atas 0.05 sehingga dapat ditarik keputusan untuk Gagal Tolak H0.
Interpretasi : Data waktu bertahan hidup mengikuti distribusi eksponensial.
lambda <- 1 / mean(df$Time)
ggplot(df, aes(x = Time)) +
geom_histogram(aes(y = ..density..), binwidth = 5, fill = "lightblue", color = "black", alpha = 0.7) +
stat_function(fun = dexp, args = list(rate = lambda), color = "red", size = 1.2) +
labs(title = "Histogram of Survival Time with Exponential Fit",
x = "Survival Time",
y = "Density") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(survival)
model <- survreg(Surv(Time, Status) ~ Log_WBC + Sex + Rx,
data = df, dist = 'exponential')
summary(model)
##
## Call:
## survreg(formula = Surv(Time, Status) ~ Log_WBC + Sex + Rx, data = df,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 5.9741 0.6965 8.58 <2e-16
## Log_WBC -0.8723 0.2212 -3.94 8e-05
## Sex -0.0859 0.3834 -0.22 0.823
## Rx -1.1156 0.4270 -2.61 0.009
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -100.7 Loglik(intercept only)= -116.8
## Chisq= 32.04 on 3 degrees of freedom, p= 5.1e-07
## Number of Newton-Raphson Iterations: 4
## n= 42
surv_fit_rx <- survfit(Surv(Time, Status) ~ Rx, data = df)
ggsurvplot(
surv_fit_rx,
data = df,
conf.int = FALSE,
pval = TRUE,
title = "Kurva Survival Berdasarkan Model AFT Eksp",
legend.title = "Rx",
legend.labs = c("Rx = 0", "Rx = 1"),
xlab = "Waktu (Survival Time)",
ylab = "Probabilitas Survival S(t)",
palette = c("blue", "red")
)
surv_fit_sex <- survfit(Surv(Time, Status) ~ Sex, data = df)
ggsurvplot(
surv_fit_sex,
data = df,
conf.int = FALSE,
pval = TRUE,
title = "Kurva Survival Berdasarkan Model AFT Eksp",
legend.title = "Sex",
legend.labs = c("Sex = 0", "Sex = 1"),
xlab = "Waktu (Survival Time)",
ylab = "Probabilitas Survival S(t)",
palette = c("blue", "red")
)
plot_data <- data.frame(Log_WBC = seq(min(df$Log_WBC), max(df$Log_WBC), length.out = 100))
plot_data$Sex <- 1
plot_data$Rx <- 0
predicted_times <- predict(model, newdata = plot_data, type = "quantile", p = 0.5)
plot_data$Predicted_Time <- predicted_times
ggplot(plot_data, aes(x = Log_WBC, y = Predicted_Time)) +
geom_line() +
labs(title = "Partial Effect of Log_WBC on Predicted Survival Time",
x = "Log_WBC",
y = "Predicted Survival Time (Median)") +
theme_minimal()