UAS ATH
Import Data
## tibble [11,166 × 15] (S3: tbl_df/tbl/data.frame)
## $ KODE : num [1:11166] 11 11 11 11 11 11 11 11 11 11 ...
## $ PROVINSI : chr [1:11166] "Aceh" "Aceh" "Aceh" "Aceh" ...
## $ TIME : num [1:11166] 5 2 2 1 1 1 1 2 15 4 ...
## $ STATUS : num [1:11166] 1 1 1 0 0 0 1 0 0 0 ...
## $ KRT : num [1:11166] 0 0 0 0 0 0 0 0 0 0 ...
## $ JK : num [1:11166] 1 1 1 0 0 0 1 0 0 0 ...
## $ UMUR : num [1:11166] 23 22 23 21 22 18 18 18 20 24 ...
## $ WILAYAH : num [1:11166] 1 0 0 1 1 0 1 0 0 0 ...
## $ KAWIN : num [1:11166] 1 1 1 1 1 1 1 1 0 1 ...
## $ DIDIK : num [1:11166] 1 1 1 1 1 1 1 1 1 1 ...
## $ SERTIF_LATIH : num [1:11166] 0 0 0 0 0 0 0 0 0 0 ...
## $ SERTIF_MAGANG: num [1:11166] 0 0 0 0 0 0 0 0 0 0 ...
## $ MRISEN : num [1:11166] 0 0 0 0 0 0 0 0 0 0 ...
## $ UMK : chr [1:11166] "tinggi" "tinggi" "tinggi" "tinggi" ...
## $ TPT : num [1:11166] 4.73 4.73 4.73 4.73 4.73 4.73 5 4.73 5 5 ...
## Warning: package 'Hmisc' was built under R version 4.5.1
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
label(data$KODE) <- "Kode Provinsi"
label(data$PROVINSI) <- "Nama Provinsi"
label(data$TIME) <- "Durasi waktu dari lulus sekolah/kuliah sampai dapat pekerjaan formal pertama kali (bulan)"
label(data$STATUS) <- "Status event/sensor pengamatan (0=sensor, 1=event)"
label(data$KRT) <- "Status Kepala Rumah Tangga (0=bukan KRT, 1=KRT)"
label(data$JK) <- "Jenis Kelamin (0=perempuan, 1=laki-laki)"
label(data$WILAYAH) <- "Kategori Wilayah Tempat Tinggal (0=perdesaan, 1=perkotaan)"
label(data$KAWIN) <- "Status Perkawinan (0=pernah kawin, 1=belum pernah kawin)"
label(data$DIDIK) <- "Pendidikan Tertinggi (1=SMA/Sederajat, 2=D1/D2/D3, 3=D4/S1)"
label(data$SERTIF_LATIH) <- "Kepemilikan Sertifikat Pelatihan (0=tidak memiliki, 1=memiliki)"
label(data$SERTIF_MAGANG) <- "Kepemilikan Sertifikat Magang (0=tidak memiliki, 1=memiliki)"
label(data$MRISEN) <- "Status Migrasi Risen (0=bukan, 1=ya)"
label(data$UMK) <- "Kategori Upah Minimum Kab/Kota (rendah, sedang, tinggi)"
label(data$TPT) <- "Tingkat Pengangguran Terbuka (%)"# Konversi variabel-variabel ke faktor dengan level yang sesuai
data$KRT <- factor(data$KRT, levels = c(0, 1), labels = c("bukan KRT", "KRT"))
data$JK <- factor(data$JK, levels = c(0, 1), labels = c("Perempuan", "Laki-laki"))
data$WILAYAH <- factor(data$WILAYAH, levels = c(0, 1), labels = c("Perdesaan", "Perkotaan"))
data$KAWIN <- factor(data$KAWIN, levels = c(0, 1), labels = c("Pernah Kawin", "Belum Pernah Kawin"))
data$DIDIK <- factor(data$DIDIK, levels = c(1, 2, 3), labels = c("SMA/Sederajat", "D1/D2/D3", "D4/S1"))
data$SERTIF_LATIH <- factor(data$SERTIF_LATIH, levels = c(0, 1), labels = c("Tidak Memiliki", "Memiliki"))
data$SERTIF_MAGANG <- factor(data$SERTIF_MAGANG, levels = c(0, 1), labels = c("Tidak Memiliki", "Memiliki"))
data$MRISEN <- factor(data$MRISEN, levels = c(0, 1), labels = c("Bukan Migran", "Migran"))
# Jika perlu, pastikan juga variabel UMK sudah jadi faktor (kalau belum)
data$UMK <- factor(data$UMK, levels = c("rendah", "sedang", "tinggi"))## tibble [11,166 × 15] (S3: tbl_df/tbl/data.frame)
## $ KODE : 'labelled' num [1:11166] 11 11 11 11 11 11 11 11 11 11 ...
## ..- attr(*, "label")= chr "Kode Provinsi"
## $ PROVINSI : 'labelled' chr [1:11166] "Aceh" "Aceh" "Aceh" "Aceh" ...
## ..- attr(*, "label")= chr "Nama Provinsi"
## $ TIME : 'labelled' num [1:11166] 5 2 2 1 1 1 1 2 15 4 ...
## ..- attr(*, "label")= chr "Durasi waktu dari lulus sekolah/kuliah sampai dapat pekerjaan formal pertama kali (bulan)"
## $ STATUS : 'labelled' num [1:11166] 1 1 1 0 0 0 1 0 0 0 ...
## ..- attr(*, "label")= chr "Status event/sensor pengamatan (0=sensor, 1=event)"
## $ KRT : Factor w/ 2 levels "bukan KRT","KRT": 1 1 1 1 1 1 1 1 1 1 ...
## $ JK : Factor w/ 2 levels "Perempuan","Laki-laki": 2 2 2 1 1 1 2 1 1 1 ...
## $ UMUR : num [1:11166] 23 22 23 21 22 18 18 18 20 24 ...
## $ WILAYAH : Factor w/ 2 levels "Perdesaan","Perkotaan": 2 1 1 2 2 1 2 1 1 1 ...
## $ KAWIN : Factor w/ 2 levels "Pernah Kawin",..: 2 2 2 2 2 2 2 2 1 2 ...
## $ DIDIK : Factor w/ 3 levels "SMA/Sederajat",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SERTIF_LATIH : Factor w/ 2 levels "Tidak Memiliki",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SERTIF_MAGANG: Factor w/ 2 levels "Tidak Memiliki",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ MRISEN : Factor w/ 2 levels "Bukan Migran",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ UMK : Factor w/ 3 levels "rendah","sedang",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ TPT : 'labelled' num [1:11166] 4.73 4.73 4.73 4.73 4.73 4.73 5 4.73 5 5 ...
## ..- attr(*, "label")= chr "Tingkat Pengangguran Terbuka (%)"
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
urutan_tpt <- data %>%
group_by(PROVINSI) %>%
summarise(Rata_TPT = mean(TPT, na.rm = TRUE)) %>%
arrange(desc(Rata_TPT))
# Tampilkan hasil
print(urutan_tpt)## # A tibble: 33 × 2
## PROVINSI Rata_TPT
## <labelled> <dbl>
## 1 Jawa Barat 7.66
## 2 Banten 7.66
## 3 Maluku 7.26
## 4 DKI Jakarta 6.69
## 5 Sulawesi Utara 6.41
## 6 Aceh 6.33
## 7 Sumatera Utara 5.93
## 8 Sumatera Barat 5.70
## 9 Papua 5.68
## 10 Papua Barat 5.65
## # ℹ 23 more rows
Selecting Data
## tibble [378 × 15] (S3: tbl_df/tbl/data.frame)
## $ KODE : 'labelled' num [1:378] 36 36 36 36 36 36 36 36 36 36 ...
## ..- attr(*, "label")= chr "Kode Provinsi"
## $ PROVINSI : 'labelled' chr [1:378] "Banten" "Banten" "Banten" "Banten" ...
## ..- attr(*, "label")= chr "Nama Provinsi"
## $ TIME : 'labelled' num [1:378] 19 27 1 5 2 20 1 17 2 16 ...
## ..- attr(*, "label")= chr "Durasi waktu dari lulus sekolah/kuliah sampai dapat pekerjaan formal pertama kali (bulan)"
## $ STATUS : 'labelled' num [1:378] 0 0 0 1 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "Status event/sensor pengamatan (0=sensor, 1=event)"
## $ KRT : Factor w/ 2 levels "bukan KRT","KRT": 1 1 1 1 1 1 1 1 1 1 ...
## $ JK : Factor w/ 2 levels "Perempuan","Laki-laki": 2 1 2 2 2 1 2 1 1 1 ...
## $ UMUR : num [1:378] 23 22 19 23 19 20 18 21 18 24 ...
## $ WILAYAH : Factor w/ 2 levels "Perdesaan","Perkotaan": 1 1 1 2 2 1 2 1 1 1 ...
## $ KAWIN : Factor w/ 2 levels "Pernah Kawin",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ DIDIK : Factor w/ 3 levels "SMA/Sederajat",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SERTIF_LATIH : Factor w/ 2 levels "Tidak Memiliki",..: 1 1 1 1 1 1 1 1 2 1 ...
## $ SERTIF_MAGANG: Factor w/ 2 levels "Tidak Memiliki",..: 1 1 2 1 1 1 1 1 2 1 ...
## $ MRISEN : Factor w/ 2 levels "Bukan Migran",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ UMK : Factor w/ 3 levels "rendah","sedang",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ TPT : 'labelled' num [1:378] 9.05 9.05 9.05 9.05 9.05 9.05 9.05 9.05 9.05 9.05 ...
## ..- attr(*, "label")= chr "Tingkat Pengangguran Terbuka (%)"
#Eksplorasi Data
library(ggplot2)
# Ambil nama variabel bertipe factor
kategori_vars <- names(mydata)[sapply(mydata, is.factor)]
print(kategori_vars)## [1] "KRT" "JK" "WILAYAH" "KAWIN"
## [5] "DIDIK" "SERTIF_LATIH" "SERTIF_MAGANG" "MRISEN"
## [9] "UMK"
library(ggplot2)
library(dplyr)
# Fungsi untuk pie chart
plot_pie <- function(varname) {
df <- mydata %>%
count(!!sym(varname)) %>%
mutate(persen = n / sum(n) * 100,
label = paste0(!!sym(varname), "\n(", round(persen, 1), "%)"))
ggplot(df, aes(x = "", y = n, fill = !!sym(varname))) +
geom_col(width = 1) +
coord_polar(theta = "y") +
theme_void() +
labs(title = paste("Distribusi", varname)) +
geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3)
}##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Buat list semua plot pie
pie_plots <- lapply(kategori_vars, plot_pie)
# Tampilkan semua pie chart, 2 per baris
do.call(grid.arrange, c(pie_plots, ncol = 2))library(ggplot2)
library(dplyr)
library(scales)
library(gridExtra)
# 1. Tangkap semua variabel faktor kecuali STATUS
factor_vars <- names(mydata)[sapply(mydata, is.factor)]
factor_vars <- setdiff(factor_vars, "STATUS")
# 2. Fungsi membuat bar chart proporsi dengan label persen
plot_bar_by_status <- function(varname) {
# Hitung proporsi manual
df_prop <- mydata %>%
group_by(across(all_of(varname)), STATUS) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(across(all_of(varname))) %>%
mutate(persen = n / sum(n))
ggplot(df_prop, aes_string(x = varname, y = "persen", fill = "STATUS")) +
geom_col(position = "stack") +
geom_text(aes(label = paste0(round(persen * 100, 1), "%")),
position = position_stack(vjust = 0.5),
color = "white", size = 3) +
scale_y_continuous(labels = percent_format()) +
theme_minimal() +
labs(
title = paste("Distribusi", varname, "berdasarkan STATUS"),
x = varname,
y = "Proporsi"
) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}
# 3. Buat list plot untuk semua faktor
bar_by_status <- lapply(factor_vars, plot_bar_by_status)## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Variabel numerik
num_vars <- c("TIME", "UMUR", "TPT")
# Plot semua boxplot dalam satu list
boxplots <- lapply(num_vars, function(v) {
ggplot(mydata, aes_string(y = v)) +
geom_boxplot(fill = "skyblue") +
theme_minimal() +
labs(title = paste("Boxplot", v), y = v)
})
# Tampilkan semua boxplot
do.call(grid.arrange, c(boxplots, ncol = 3))box_by_status <- lapply(num_vars, function(v) {
ggplot(mydata, aes_string(x = "STATUS", y = v, fill = "STATUS")) +
geom_boxplot() +
theme_minimal() +
labs(title = paste(v, "berdasarkan STATUS"), x = "STATUS", y = v)
})
do.call(grid.arrange, c(box_by_status, ncol = 3))## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Analisis Deskriptif
Ringkasan Data
library(dplyr)
library(tidyr)
# Fungsi untuk membuat tabel frekuensi + persentase
make_wide_table <- function(varname, label_name) {
mydata %>%
count(!!sym(varname)) %>%
mutate(
n_pct = paste0(n, " (", round(100 * n / sum(n), 1), "%)")
) %>%
select(Category = !!sym(varname), n_pct) %>%
pivot_wider(names_from = Category, values_from = n_pct) %>%
mutate(Variable = label_name) %>%
select(Variable, everything())
}
# Daftar nama variabel
variabel_list <- c("STATUS","KRT", "WILAYAH", "KAWIN", "DIDIK", "JK",
"SERTIF_LATIH", "SERTIF_MAGANG", "MRISEN", "UMK")
# Gunakan nama label sama dengan nama variabel (bisa disesuaikan kalau perlu)
label_list <- variabel_list
# Buat tabel ringkas
tabel_ringkas_lain <- purrr::map2_dfr(variabel_list, label_list, make_wide_table)
# Tampilkan tabel
print(tabel_ringkas_lain)## # A tibble: 10 × 21
## Variable `0` `1` `bukan KRT` KRT Perdesaan Perkotaan `Pernah Kawin`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 STATUS 290 … 88 (… <NA> <NA> <NA> <NA> <NA>
## 2 KRT <NA> <NA> 375 (99.2%) 3 (0… <NA> <NA> <NA>
## 3 WILAYAH <NA> <NA> <NA> <NA> 63 (16.7… 315 (83.… <NA>
## 4 KAWIN <NA> <NA> <NA> <NA> <NA> <NA> 8 (2.1%)
## 5 DIDIK <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 6 JK <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 7 SERTIF_LATIH <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 8 SERTIF_MAGA… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 9 MRISEN <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 10 UMK <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## # ℹ 13 more variables: `Belum Pernah Kawin` <chr>, `SMA/Sederajat` <chr>,
## # `D1/D2/D3` <chr>, `D4/S1` <chr>, Perempuan <chr>, `Laki-laki` <chr>,
## # `Tidak Memiliki` <chr>, Memiliki <chr>, `Bukan Migran` <chr>, Migran <chr>,
## # rendah <chr>, sedang <chr>, tinggi <chr>
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 6.000 9.619 14.000 72.000
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.00 19.00 20.00 20.31 22.00 24.00
Kaplan Maiyer
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
##
## Attaching package: 'eha'
## The following objects are masked from 'package:flexsurv':
##
## dgompertz, dllogis, hgompertz, Hgompertz, hllogis, Hllogis, hlnorm,
## Hlnorm, hweibull, Hweibull, pgompertz, pllogis, qgompertz, qllogis,
## rgompertz, rllogis
## The following object is masked from 'package:Hmisc':
##
## logrank
library(haven) # jika impor dari .sav
# Asumsi dataset sudah dibersihkan dan bernama `data`
# Buat objek Surv
mydata$s_obj <- Surv(time = mydata$TIME, event = mydata$STATUS)# Library yang diperlukan
library(survival)
library(survminer)
# Buat model Kaplan-Meier
km_fit <- survfit(Surv(time = mydata$TIME, event = mydata$STATUS) ~ 1)
# Plot KM yang telah ditingkatkan
# Plot KM yang telah ditingkatkan - tanpa p-value karena model tidak memiliki grup
ggsurvplot(
fit = km_fit,
data = mydata,
conf.int = TRUE, # Interval kepercayaan
pval = FALSE, # Tidak perlu nilai p karena tidak ada grup dibandingkan
risk.table = TRUE, # Tabel risiko
risk.table.title = "Jumlah individu yang berisiko per waktu",
risk.table.y.text.col = TRUE,
risk.table.y.text = FALSE,
surv.median.line = "hv",
xlab = "Waktu (bulan)",
ylab = "Probabilitas Survival",
ggtheme = theme_bw(base_size = 14),
palette = c("#0073C2FF"),
censor.shape = 124,
censor.size = 3,
legend = "bottom",
legend.labs = c("Keseluruhan Sample"),
break.time.by = 6
) +
labs(
title = "Kurva Kaplan-Meier Keseluruhan",
subtitle = "Dengan interval kepercayaan dan garis median",
caption = "Sumber: Sakernas 2023"
)Analisis Inferensia
Uji Log rank
library(survival)
library(survminer)
variabel_list <- c("STATUS","KRT", "WILAYAH", "KAWIN", "DIDIK", "JK",
"SERTIF_LATIH", "SERTIF_MAGANG", "MRISEN", "UMK")
# Loop eksplorasi Kaplan-Meier dan log-rank
for (v in variabel_list) {
cat("\n\n==============================\n")
cat("Variable:", v, "\n")
# Coba-catch agar error tidak hentikan loop
tryCatch({
# Survival object
# Pastikan kolom sebagai faktor (untuk KM)
mydata[[v]] <- as.factor(mydata[[v]])
# Model survival
fit <- survfit(mydata$s_obj ~ mydata[[v]])
# Plot KM
print(ggsurvplot(
fit,
data = mydata, # <- ganti dari data.frame(group_var)
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
ggtheme = theme_minimal(),
title = paste("Kaplan-Meier -", v),
legend.title = v,
legend.labs = levels(mydata[[v]])
))
# Uji log-rank
print(survdiff(mydata$s_obj ~ mydata[[v]], data = mydata))
# Plot log-log survival
print(ggsurvplot(
fit,
data = mydata,
fun = "cloglog",
conf.int = FALSE,
ggtheme = theme_minimal(),
title = paste("Log-Log Survival -", v),
legend.title = v,
legend.labs = levels(mydata[[v]])
))
}, error = function(e) {
cat("Error processing variable", v, ":", conditionMessage(e), "\n")
})
}##
##
## ==============================
## Variable: STATUS
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=0 290 0 71.1 71.1 414
## mydata[[v]]=1 88 88 16.9 299.0 414
##
## Chisq= 414 on 1 degrees of freedom, p= <2e-16
##
##
## ==============================
## Variable: KRT
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=bukan KRT 375 86 87.516 0.0263 5
## mydata[[v]]=KRT 3 2 0.484 4.7498 5
##
## Chisq= 5 on 1 degrees of freedom, p= 0.03
##
##
## ==============================
## Variable: WILAYAH
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Perdesaan 63 11 15.7 1.395 1.77
## mydata[[v]]=Perkotaan 315 77 72.3 0.302 1.77
##
## Chisq= 1.8 on 1 degrees of freedom, p= 0.2
##
##
## ==============================
## Variable: KAWIN
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Pernah Kawin 8 3 1.88 0.6738 0.719
## mydata[[v]]=Belum Pernah Kawin 370 85 86.12 0.0147 0.719
##
## Chisq= 0.7 on 1 degrees of freedom, p= 0.4
##
##
## ==============================
## Variable: DIDIK
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=SMA/Sederajat 332 71 78.78 0.769 7.69
## mydata[[v]]=D1/D2/D3 15 5 2.98 1.377 1.49
## mydata[[v]]=D4/S1 31 12 6.24 5.313 5.98
##
## Chisq= 7.8 on 2 degrees of freedom, p= 0.02
##
##
## ==============================
## Variable: JK
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Perempuan 171 40 38.5 0.0623 0.116
## mydata[[v]]=Laki-laki 207 48 49.5 0.0484 0.116
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.7
##
##
## ==============================
## Variable: SERTIF_LATIH
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Tidak Memiliki 298 63 69.1 0.536 2.73
## mydata[[v]]=Memiliki 80 25 18.9 1.957 2.73
##
## Chisq= 2.7 on 1 degrees of freedom, p= 0.1
##
##
## ==============================
## Variable: SERTIF_MAGANG
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Tidak Memiliki 345 81 81.84 0.00858 0.152
## mydata[[v]]=Memiliki 33 7 6.16 0.11395 0.152
##
## Chisq= 0.2 on 1 degrees of freedom, p= 0.7
##
##
## ==============================
## Variable: MRISEN
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Bukan Migran 374 85 87.201 0.0555 6.4
## mydata[[v]]=Migran 4 3 0.799 6.0615 6.4
##
## Chisq= 6.4 on 1 degrees of freedom, p= 0.01
##
##
## ==============================
## Variable: UMK
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=rendah 44 11 10.8 0.00487 0.00579
## mydata[[v]]=sedang 117 25 23.9 0.04682 0.06759
## mydata[[v]]=tinggi 217 52 53.3 0.03112 0.08319
##
## Chisq= 0.1 on 2 degrees of freedom, p= 1
#library(survival)
library(survminer)
# Tambahkan UMUR dan JUMUR ke list variabel
variabel_list <- c("UMUR", "JUMUR", "KRT", "DIDIK", "MRISEN")
# Ubah jadi faktor sebelum loop
mydata$UMUR <- as.factor(mydata$UMUR)
# Loop eksplorasi Kaplan-Meier dan log-rank
for (v in variabel_list) {
cat("\n\n==============================\n")
cat("Variable:", v, "\n")
tryCatch({
# Pastikan faktor
mydata[[v]] <- as.factor(mydata[[v]])
# Survival fit
fit <- survfit(mydata$s_obj ~ mydata[[v]])
# Plot KM
print(ggsurvplot(
fit,
data = mydata,
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
ggtheme = theme_minimal(),
title = paste("Kaplan-Meier -", v),
legend.title = v,
legend.labs = levels(mydata[[v]])
))
# Log-rank test
print(survdiff(mydata$s_obj ~ mydata[[v]], data = mydata))
# Plot log-log survival
print(ggsurvplot(
fit,
data = mydata,
fun = "cloglog",
conf.int = FALSE,
ggtheme = theme_minimal(),
title = paste("Log-Log Survival -", v),
legend.title = v,
legend.labs = levels(mydata[[v]])
))
}, error = function(e) {
cat("Error processing variable", v, ":", conditionMessage(e), "\n")
})
}##
##
## ==============================
## Variable: UMUR
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=17 15 2 1.35 0.31839 0.34902
## mydata[[v]]=18 72 13 9.93 0.95001 1.15221
## mydata[[v]]=19 73 21 17.43 0.73210 0.95748
## mydata[[v]]=20 57 16 15.25 0.03679 0.04645
## mydata[[v]]=21 46 12 12.55 0.02375 0.02888
## mydata[[v]]=22 42 6 11.99 2.99508 3.62625
## mydata[[v]]=23 41 9 10.70 0.26928 0.31960
## mydata[[v]]=24 32 9 8.81 0.00409 0.00527
##
## Chisq= 5.7 on 7 degrees of freedom, p= 0.6
##
##
## ==============================
## Variable: JUMUR
## Error processing variable JUMUR : Assigned data `as.factor(mydata[[v]])` must be compatible with existing
## data.
## ✖ Existing data has 378 rows.
## ✖ Assigned data has 0 rows.
## ℹ Only vectors of size 1 are recycled.
## Caused by error in `vectbl_recycle_rhs_rows()`:
## ! Can't recycle input of size 0 to size 378.
##
##
## ==============================
## Variable: KRT
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=bukan KRT 375 86 87.516 0.0263 5
## mydata[[v]]=KRT 3 2 0.484 4.7498 5
##
## Chisq= 5 on 1 degrees of freedom, p= 0.03
##
##
## ==============================
## Variable: DIDIK
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=SMA/Sederajat 332 71 78.78 0.769 7.69
## mydata[[v]]=D1/D2/D3 15 5 2.98 1.377 1.49
## mydata[[v]]=D4/S1 31 12 6.24 5.313 5.98
##
## Chisq= 7.8 on 2 degrees of freedom, p= 0.02
##
##
## ==============================
## Variable: MRISEN
## Call:
## survdiff(formula = mydata$s_obj ~ mydata[[v]], data = mydata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mydata[[v]]=Bukan Migran 374 85 87.201 0.0555 6.4
## mydata[[v]]=Migran 4 3 0.799 6.0615 6.4
##
## Chisq= 6.4 on 1 degrees of freedom, p= 0.01
Model Cox PH
mydata$UMUR<-as.numeric(mydata$UMUR)
cox_model <-coxph(s_obj ~ KRT + DIDIK + MRISEN, data = mydata)
summary(cox_model)## Call:
## coxph(formula = s_obj ~ KRT + DIDIK + MRISEN, data = mydata)
##
## n= 378, number of events= 88
##
## coef exp(coef) se(coef) z Pr(>|z|)
## KRTKRT 1.6574 5.2459 0.7223 2.295 0.0217 *
## DIDIKD1/D2/D3 0.4638 1.5901 0.4930 0.941 0.3468
## DIDIKD4/S1 0.7751 2.1708 0.3152 2.459 0.0139 *
## MRISENMigran 1.1948 3.3028 0.6263 1.908 0.0564 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## KRTKRT 5.246 0.1906 1.2735 21.609
## DIDIKD1/D2/D3 1.590 0.6289 0.6050 4.179
## DIDIKD4/S1 2.171 0.4607 1.1704 4.026
## MRISENMigran 3.303 0.3028 0.9677 11.272
##
## Concordance= 0.552 (se = 0.022 )
## Likelihood ratio test= 12.39 on 4 df, p=0.01
## Wald test = 16.52 on 4 df, p=0.002
## Score (logrank) test = 18.95 on 4 df, p=8e-04
## chisq df p
## KRT 1.89 1 0.170
## DIDIK 5.64 2 0.060
## MRISEN 2.14 1 0.143
## GLOBAL 9.81 4 0.044
Model Stratified Cox
## Call:
## coxph(formula = s_obj ~ KRT + DIDIK + MRISEN + strata(UMUR),
## data = mydata)
##
## n= 378, number of events= 88
##
## coef exp(coef) se(coef) z Pr(>|z|)
## KRTKRT 1.8044 6.0761 0.7489 2.409 0.0160 *
## DIDIKD1/D2/D3 1.3817 3.9816 0.5695 2.426 0.0153 *
## DIDIKD4/S1 1.8783 6.5426 0.4689 4.006 6.18e-05 ***
## MRISENMigran 1.3120 3.7136 0.6760 1.941 0.0523 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## KRTKRT 6.076 0.1646 1.4001 26.37
## DIDIKD1/D2/D3 3.982 0.2512 1.3041 12.16
## DIDIKD4/S1 6.543 0.1528 2.6100 16.40
## MRISENMigran 3.714 0.2693 0.9871 13.97
##
## Concordance= 0.565 (se = 0.018 )
## Likelihood ratio test= 25.03 on 4 df, p=5e-05
## Wald test = 26.23 on 4 df, p=3e-05
## Score (logrank) test = 33.32 on 4 df, p=1e-06
## chisq df p
## KRT 0.423 1 0.52
## DIDIK 1.595 2 0.45
## MRISEN 0.459 1 0.50
## GLOBAL 2.841 4 0.58
## [1] 599.8247
## chisq df p
## KRT 1.89 1 0.170
## DIDIK 5.64 2 0.060
## MRISEN 2.14 1 0.143
## GLOBAL 9.81 4 0.044
Parametrik
Model Null
# Model Exponential
exp_model <- survreg(s_obj ~ 1, data = mydata, dist = "exponential")
# Model Weibull
weib_model <- survreg(s_obj ~ 1, data = mydata, dist = "weibull")
# Model Log-normal
lnorm_model <- survreg(s_obj ~ 1, data = mydata, dist = "lognormal")
# Model Log-logistic
llogis_model <- survreg(s_obj ~ 1, data = mydata, dist = "loglogistic")AIC null
# Buat tabel AIC awal dari model parametrik
aic_table <- data.frame(
Distribusi = c("exponential", "weibull", "lognormal", "loglogistic"),
AIC = c(
AIC(exp_model),
AIC(weib_model),
AIC(lnorm_model),
AIC(llogis_model)
)
)
# Tambahkan AIC dari model Cox
cox_row <- data.frame(
Distribusi = "coxph",
AIC = AIC(cox_model)
)
# Gabungkan semua ke dalam satu tabel
aic_table <- rbind(aic_table, cox_row)
# Urutkan berdasarkan AIC terkecil
aic_table <- aic_table[order(aic_table$AIC), ]
# Tampilkan tabel akhir
print(aic_table)## Distribusi AIC
## 3 lognormal 805.1600
## 4 loglogistic 816.3117
## 2 weibull 823.2771
## 1 exponential 832.9493
## 5 coxph 938.4964
# Model survival
fit_llogis <- flexsurvreg(s_obj ~ 1, data = mydata, dist = "llogis")
fit_lnorm <- flexsurvreg(s_obj ~ 1, data = mydata, dist = "lognormal")
# Plot survival functions
plot(fit_llogis,
col = "red",
lwd = 2,
lwd.ci = 0,
xlab = "Time",
ylab = "Survival Probability",
main = "Survival Function: Loglogistic vs Lognormal")
# Tambahkan model lognormal ke plot
plot(fit_lnorm,
col = "blue",
lwd = 2,
lwd.ci = 0,
add = TRUE)
# Tambahkan legenda
legend("bottomleft",
legend = c("Loglogistic", "Lognormal"),
col = c("red", "blue"),
lty = 1,
lwd = 1)Model LogNormal
library(flexsurv)
model_terbaik1 <- flexsurvreg(s_obj ~UMUR+KRT + DIDIK + MRISEN, data = mydata, dist = "lognormal")
(model_terbaik1)## Call:
## flexsurvreg(formula = s_obj ~ UMUR + KRT + DIDIK + MRISEN, data = mydata,
## dist = "lognormal")
##
## Estimates:
## data mean est L95% U95% se exp(est)
## meanlog NA 2.27312 1.58250 2.96375 0.35237 NA
## sdlog NA 1.77995 1.51929 2.08534 0.14380 NA
## UMUR 4.31481 0.33197 0.18019 0.48374 0.07744 1.39371
## KRTKRT 0.00794 -2.18333 -4.45024 0.08358 1.15661 0.11267
## DIDIKD1/D2/D3 0.03968 -1.44553 -2.68346 -0.20759 0.63161 0.23562
## DIDIKD4/S1 0.08201 -1.75047 -2.70480 -0.79614 0.48691 0.17369
## MRISENMigran 0.01058 -1.63104 -3.51434 0.25226 0.96089 0.19573
## L95% U95%
## meanlog NA NA
## sdlog NA NA
## UMUR 1.19745 1.62213
## KRTKRT 0.01168 1.08717
## DIDIKD1/D2/D3 0.06833 0.81254
## DIDIKD4/S1 0.06688 0.45106
## MRISENMigran 0.02977 1.28693
##
## N = 378, Events: 88, Censored: 290
## Total time at risk: 3636
## Log-likelihood = -387.1856, df = 7
## AIC = 788.3712
## [1] 788.3712
Model Loglogistic
library(flexsurv)
model_terbaik2 <- flexsurvreg(s_obj ~UMUR+KRT + DIDIK + MRISEN, data = mydata, dist = "llogis")
(model_terbaik2)## Call:
## flexsurvreg(formula = s_obj ~ UMUR + KRT + DIDIK + MRISEN, data = mydata,
## dist = "llogis")
##
## Estimates:
## data mean est L95% U95% se exp(est)
## shape NA 0.99422 0.83987 1.17693 0.08558 NA
## scale NA 8.63516 4.25519 17.52354 3.11798 NA
## UMUR 4.31481 0.34966 0.18854 0.51078 0.08221 1.41858
## KRTKRT 0.00794 -2.24436 -4.21444 -0.27429 1.00516 0.10600
## DIDIKD1/D2/D3 0.03968 -1.43872 -2.69014 -0.18730 0.63849 0.23723
## DIDIKD4/S1 0.08201 -1.94019 -2.85955 -1.02082 0.46907 0.14368
## MRISENMigran 0.01058 -1.61929 -3.29242 0.05383 0.85365 0.19804
## L95% U95%
## shape NA NA
## scale NA NA
## UMUR 1.20748 1.66658
## KRTKRT 0.01478 0.76011
## DIDIKD1/D2/D3 0.06787 0.82919
## DIDIKD4/S1 0.05729 0.36030
## MRISENMigran 0.03716 1.05531
##
## N = 378, Events: 88, Censored: 290
## Total time at risk: 3636
## Log-likelihood = -391.2678, df = 7
## AIC = 796.5355
## [1] 796.5355
# Buat tabel AIC dari kedua model
aic_perbandingan <- data.frame(
Model = c("Lognormal", "Loglogistic","Cox PH (Tanpa Umur)","Stratified Cox"),
AIC = c(AIC(model_terbaik1), AIC(model_terbaik2),AIC(cox_model),AIC(str_model))
)
# Urutkan dari AIC terkecil
aic_perbandingan <- aic_perbandingan[order(aic_perbandingan$AIC), ]
# Tampilkan tabel
print(aic_perbandingan)## Model AIC
## 4 Stratified Cox 599.8247
## 1 Lognormal 788.3712
## 2 Loglogistic 796.5355
## 3 Cox PH (Tanpa Umur) 938.4964
plot(model_terbaik2,
type = "survival",
col = "blue",
lwd = 2,
xlab = "Time",
ylab = "Survival Probability",
main = "Survival Curve - Loglogistik AFT Model")plot(model_terbaik1,
type = "survival",
col = "blue",
lwd = 2,
xlab = "Time",
ylab = "Survival Probability",
main = "Survival Curve - Lognormal AFT Model")## Call:
## coxph(formula = s_obj ~ KRT + DIDIK + MRISEN + strata(UMUR),
## data = mydata)
##
## n= 378, number of events= 88
##
## coef exp(coef) se(coef) z Pr(>|z|)
## KRTKRT 1.8044 6.0761 0.7489 2.409 0.0160 *
## DIDIKD1/D2/D3 1.3817 3.9816 0.5695 2.426 0.0153 *
## DIDIKD4/S1 1.8783 6.5426 0.4689 4.006 6.18e-05 ***
## MRISENMigran 1.3120 3.7136 0.6760 1.941 0.0523 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## KRTKRT 6.076 0.1646 1.4001 26.37
## DIDIKD1/D2/D3 3.982 0.2512 1.3041 12.16
## DIDIKD4/S1 6.543 0.1528 2.6100 16.40
## MRISENMigran 3.714 0.2693 0.9871 13.97
##
## Concordance= 0.565 (se = 0.018 )
## Likelihood ratio test= 25.03 on 4 df, p=5e-05
## Wald test = 26.23 on 4 df, p=3e-05
## Score (logrank) test = 33.32 on 4 df, p=1e-06