Anggota Kelompok

  1. Salma Ayu Hanifah (164221012)
  2. Sinta Dian Monica (164221018)

Tugas Kelompok Analisis Ketahanan Hidup SD-A1, Program Studi Teknologi Sains Data, Fakultas Teknologi Maju dan Multidisiplin, Universitas Airlangga, Surabaya.

Import Library

library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(survival)
## Warning: package 'survival' was built under R version 4.2.3
library(survminer)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.2.3
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma

Dataset

df <- read_excel("D:/SEMESTER 5/Analisis Ketahanan Hidup/M9&10- Fungsi Survival Parametrik/Dataset1.xlsx")
df
## # A tibble: 42 × 5
##     Time Status    Rx Log_WBC   Sex
##    <dbl>  <dbl> <dbl>   <dbl> <dbl>
##  1    35      0     0    1.45     1
##  2    34      0     0    1.47     1
##  3    32      0     0    2.2      1
##  4    32      0     0    2.53     1
##  5    25      0     0    1.78     1
##  6    23      1     0    2.57     1
##  7    23      1     1    1.97     1
##  8    22      1     0    2.32     1
##  9    22      1     1    2.73     0
## 10    20      0     0    2.01     1
## # ℹ 32 more rows

Pengujian Statistik

Plot Q-Q (Quantile-Quantile)

# Estimasi lambda dari rata-rata waktu bertahan
lambda_hat <- 1 / mean(df$Time)

# Q-Q Plot
qqplot(qexp(ppoints(length(df$Time)), rate = lambda_hat), df$Time, 
       main = "Exponential Q-Q Plot", 
       xlab = "Theoretical Quantiles", 
       ylab = "Sample Quantiles")
abline(0, 1, col = "red") 

Interpretasi: titik-titik pada Q-Q plot mendekati garis lurus, maka data mungkin cocok dengan distribusi eksponensial.

Uji Kolmogorov-Smirnov

ks_test <- ks.test(df$Time, "pexp", rate = lambda_hat)
## Warning in ks.test.default(df$Time, "pexp", rate = lambda_hat): ties should not
## be present for the Kolmogorov-Smirnov test
print(ks_test)
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  df$Time
## D = 0.15808, p-value = 0.2447
## alternative hypothesis: two-sided

H0 : Data waktu bertahan hidup mengikuti distribusi eksponensial.

H1 : Data waktu bertahan hidup tidak mengikuti distribusi eksponensial.

Hasil : p-value di atas 0.05 sehingga dapat ditarik keputusan untuk Gagal Tolak H0.

Interpretasi : Data waktu bertahan hidup mengikuti distribusi eksponensial.

Uji Chi-Square Goodness-of-Fit

# Tentukan interval waktu bertahan dan hitung frekuensi empiris
breaks <- seq(0, max(df$Time), by = 5) 
observed <- hist(df$Time, breaks = breaks, plot = FALSE)$counts

# Hitung frekuensi teoretis berdasarkan distribusi eksponensial
expected <- diff(pexp(breaks, rate = lambda_hat)) * length(df$Time)

# Uji Chi-Square
chisq_test <- chisq.test(observed, p = expected, rescale.p = TRUE)
## Warning in chisq.test(observed, p = expected, rescale.p = TRUE): Chi-squared
## approximation may be incorrect
print(chisq_test)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 10.691, df = 6, p-value = 0.0984

H0 : Data waktu bertahan hidup mengikuti distribusi eksponensial.

H1 : Data waktu bertahan hidup tidak mengikuti distribusi eksponensial.

Hasil : p-value di atas 0.05 sehingga dapat ditarik keputusan untuk Gagal Tolak H0.

Interpretasi : Data waktu bertahan hidup mengikuti distribusi eksponensial.

Visualisasi Survival Time with Exponential Fit

lambda <- 1 / mean(df$Time)
ggplot(df, aes(x = Time)) +
  geom_histogram(aes(y = ..density..), binwidth = 5, fill = "lightblue", color = "black", alpha = 0.7) +
  stat_function(fun = dexp, args = list(rate = lambda), color = "red", size = 1.2) +
  labs(title = "Histogram of Survival Time with Exponential Fit",
       x = "Survival Time",
       y = "Density") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Model Survival Parametrik Eksponensial (AFT)

library(survival)

model <- survreg(Surv(Time, Status) ~ Log_WBC + Sex + Rx,
                           data = df, dist = 'exponential')
summary(model)
## 
## Call:
## survreg(formula = Surv(Time, Status) ~ Log_WBC + Sex + Rx, data = df, 
##     dist = "exponential")
##               Value Std. Error     z      p
## (Intercept)  5.9741     0.6965  8.58 <2e-16
## Log_WBC     -0.8723     0.2212 -3.94  8e-05
## Sex         -0.0859     0.3834 -0.22  0.823
## Rx          -1.1156     0.4270 -2.61  0.009
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -100.7   Loglik(intercept only)= -116.8
##  Chisq= 32.04 on 3 degrees of freedom, p= 5.1e-07 
## Number of Newton-Raphson Iterations: 4 
## n= 42

Visualisasi Kurva Survival Eksponensial

surv_fit_rx <- survfit(Surv(Time, Status) ~ Rx, data = df)

ggsurvplot(
  surv_fit_rx,
  data = df,
  conf.int = FALSE,
  pval = TRUE,
  title = "Kurva Survival Berdasarkan Model AFT Eksp",
  legend.title = "Rx",
  legend.labs = c("Rx = 0", "Rx = 1"),
  xlab = "Waktu (Survival Time)",
  ylab = "Probabilitas Survival S(t)",
  palette = c("blue", "red")
)

Visualisasi Kurva Survival Berdasarkan Jenis Kelamin (Sex)

surv_fit_sex <- survfit(Surv(Time, Status) ~ Sex, data = df)

ggsurvplot(
  surv_fit_sex,
  data = df,
  conf.int = FALSE,
  pval = TRUE,
  title = "Kurva Survival Berdasarkan Model AFT Eksp",
  legend.title = "Sex",
  legend.labs = c("Sex = 0", "Sex = 1"),
  xlab = "Waktu (Survival Time)",
  ylab = "Probabilitas Survival S(t)",
  palette = c("blue", "red")
)

Visualisasi Efek Parsial Log_WBC terhadap Prediksi Waktu Survival

plot_data <- data.frame(Log_WBC = seq(min(df$Log_WBC), max(df$Log_WBC), length.out = 100))
plot_data$Sex <- 1  
plot_data$Rx <- 0 

predicted_times <- predict(model, newdata = plot_data, type = "quantile", p = 0.5)
plot_data$Predicted_Time <- predicted_times

ggplot(plot_data, aes(x = Log_WBC, y = Predicted_Time)) +
  geom_line() +
  labs(title = "Partial Effect of Log_WBC on Predicted Survival Time",
       x = "Log_WBC",
       y = "Predicted Survival Time (Median)") +
  theme_minimal()