Kondisi financial distress pada perusahaan publik sektor transportasi merupakan fenomena yang dapat menimbulkan kerugian bagi investor, kreditur, maupun pemangku kepentingan lainnya. Oleh karena itu, diperlukan suatu model prediksi yang mampu mengidentifikasi kemungkinan perusahaan mengalami financial distress secara dini berdasarkan indikator keuangan yang tersedia.
Berdasarkan latar belakang tersebut, rumusan masalah dalam analisis ini adalah:
Sesuai dengan rumusan masalah di atas, tujuan analisis ini adalah:
required_packages <- c("dplyr", "ggplot2", "broom", "knitr", "scales", "readr")
missing_packages <- required_packages[
!vapply(required_packages, requireNamespace, logical(1), quietly = TRUE)
]
if (length(missing_packages) > 0) {
stop(
"Paket berikut belum tersedia: ",
paste(missing_packages, collapse = ", "),
". Silakan install terlebih dahulu."
)
}
invisible(lapply(required_packages, library, character.only = TRUE))Data yang digunakan adalah data Financial Distress perusahaan publik yang memuat indikator-indikator keuangan sebagai prediktor dan status financial distress sebagai variabel respon biner.
raw_data <- readr::read_csv("data projek adk bismillah-3.csv",
show_col_types = FALSE)
# Periksa dimensi awal
ringkasan_data <- data.frame(
Keterangan = c("Jumlah observasi", "Jumlah variabel"),
Nilai = c(nrow(raw_data), ncol(raw_data))
)
knitr::kable(
ringkasan_data,
caption = "Ukuran dataset Financial Distress"
)| Keterangan | Nilai |
|---|---|
| Jumlah observasi | 120 |
| Jumlah variabel | 10 |
kamus_variabel <- data.frame(
`Kode kolom` = c("X1", "X2", "X3", "X4", "X5", "Y"),
`Nama variabel analisis` = c(
"current_ratio", "debt_ratio", "roa",
"sales_growth", "firm_size", "financial_distress"
),
`Keterangan` = c(
"Current Ratio — rasio aset lancar terhadap kewajiban lancar",
"Debt Ratio — rasio total utang terhadap total aset",
"Return on Assets — laba bersih dibagi total aset",
"Sales Growth — pertumbuhan penjualan tahunan",
"Firm Size — logaritma natural total aset",
"Status financial distress: 0 = Non-distress, 1 = Distress"
),
`Tipe dalam analisis` = c(
"Numerik", "Numerik", "Numerik",
"Numerik", "Numerik", "Respon biner"
),
check.names = FALSE
)
knitr::kable(
kamus_variabel,
caption = "Kamus variabel dataset Financial Distress"
)| Kode kolom | Nama variabel analisis | Keterangan | Tipe dalam analisis |
|---|---|---|---|
| X1 | current_ratio | Current Ratio — rasio aset lancar terhadap kewajiban lancar | Numerik |
| X2 | debt_ratio | Debt Ratio — rasio total utang terhadap total aset | Numerik |
| X3 | roa | Return on Assets — laba bersih dibagi total aset | Numerik |
| X4 | sales_growth | Sales Growth — pertumbuhan penjualan tahunan | Numerik |
| X5 | firm_size | Firm Size — logaritma natural total aset | Numerik |
| Y | financial_distress | Status financial distress: 0 = Non-distress, 1 = Distress | Respon biner |
fd <- raw_data %>%
transmute(
kode_saham = `Kode Saham`,
nama_perusahaan = `Nama Perusahaan`,
tahun = Tahun,
current_ratio = `Current Ratio\n(X1)`,
debt_ratio = `Debt Ratio\n(X2)`,
roa = `ROA\n(X3)`,
sales_growth = `Sales Growth\n(X4)`,
firm_size = `Firm Size\n(X5)\n=LN(Total Aset)`,
financial_distress = factor(
`Financial Distress\n(Y)\n0=Non | 1=Distress`,
levels = c(0, 1),
labels = c("Non-distress", "Distress")
),
distress_int = as.integer(
`Financial Distress\n(Y)\n0=Non | 1=Distress`
)
) %>%
na.omit()contoh_data <- fd %>%
transmute(
`Kode Saham` = kode_saham,
`Tahun` = tahun,
`Current Ratio` = current_ratio,
`Debt Ratio` = debt_ratio,
`ROA` = roa,
`Sales Growth` = sales_growth,
`Firm Size` = round(firm_size, 2),
`Status` = financial_distress
) %>%
head(8)
knitr::kable(
contoh_data,
caption = "Contoh delapan baris data setelah pembersihan"
)| Kode Saham | Tahun | Current Ratio | Debt Ratio | ROA | Sales Growth | Firm Size | Status |
|---|---|---|---|---|---|---|---|
| CMPP | 2019 | 0.4713503 | 0.9226942 | -0.0600842 | 0.5849279 | 7.87 | Distress |
| CMPP | 2020 | 0.0349001 | 1.4785397 | -0.4530505 | -0.7598748 | 8.71 | Distress |
| CMPP | 2021 | 0.0251439 | 2.0108759 | -0.4540688 | -0.6114215 | 8.55 | Distress |
| CMPP | 2022 | 0.0384977 | 2.2721673 | -0.3074482 | 5.0399361 | 8.59 | Distress |
| CMPP | 2023 | 0.0387037 | 1.4447351 | -0.1767495 | 0.7521820 | 8.72 | Distress |
| CMPP | 2024 | 0.0474417 | 1.6405457 | -0.2670981 | 0.1990943 | 8.65 | Distress |
| GIAA | 2019 | 0.3480469 | 0.8382738 | 0.0014692 | 0.0380320 | 11.03 | Non-distress |
| GIAA | 2020 | 0.1249298 | 1.1800740 | -0.2372054 | -0.6637139 | 11.93 | Non-distress |
class_summary <- fd %>%
count(financial_distress, name = "Jumlah") %>%
mutate(Proporsi = scales::percent(Jumlah / sum(Jumlah), accuracy = 0.1)) %>%
rename(`Status Financial Distress` = financial_distress)
knitr::kable(
class_summary,
caption = "Distribusi kelas respon Financial Distress"
)| Status Financial Distress | Jumlah | Proporsi |
|---|---|---|
| Non-distress | 91 | 75.8% |
| Distress | 29 | 24.2% |
ggplot(fd, aes(x = financial_distress, fill = financial_distress)) +
geom_bar(width = 0.55, color = "white", linewidth = 0.8) +
geom_text(
stat = "count",
aes(label = after_stat(count)),
vjust = -0.4,
fontface = "bold"
) +
scale_fill_manual(values = c("Non-distress" = "#2a9d8f", "Distress" = "#e76f51")) +
labs(
title = "Distribusi Status Financial Distress",
x = NULL,
y = "Jumlah perusahaan-tahun"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "none")ggplot(fd, aes(x = debt_ratio, y = roa, color = financial_distress)) +
geom_point(alpha = 0.72, size = 2.1) +
scale_color_manual(values = c("Non-distress" = "#2a9d8f", "Distress" = "#e76f51")) +
labs(
title = "Debt Ratio vs. Return on Assets",
subtitle = "Titik oranye menunjukkan perusahaan dalam kondisi financial distress",
x = "Debt Ratio (X2)",
y = "Return on Assets (X3)",
color = "Status"
) +
theme_minimal(base_size = 12)set.seed(42)
stratified_split <- function(y, prop = 0.8) {
idx_by_class <- split(seq_along(y), y)
train_idx <- lapply(
idx_by_class,
function(idx) sample(idx, size = floor(length(idx) * prop))
)
unlist(train_idx, use.names = FALSE)
}
train_id <- stratified_split(fd$distress_int, prop = 0.8)
train_data <- fd[ train_id, ]
test_data <- fd[-train_id, ]
split_summary <- bind_rows(
train_data %>% count(financial_distress) %>% mutate(data = "Training"),
test_data %>% count(financial_distress) %>% mutate(data = "Testing")
) %>%
group_by(data) %>%
mutate(proporsi = scales::percent(n / sum(n), accuracy = 0.1)) %>%
ungroup() %>%
select(data, financial_distress, n, proporsi) %>%
rename(
Data = data,
`Status` = financial_distress,
Jumlah = n,
Proporsi = proporsi
)
knitr::kable(split_summary, caption = "Distribusi kelas pada training dan testing")| Data | Status | Jumlah | Proporsi |
|---|---|---|---|
| Training | Non-distress | 72 | 75.8% |
| Training | Distress | 23 | 24.2% |
| Testing | Non-distress | 19 | 76.0% |
| Testing | Distress | 6 | 24.0% |
fd_fit <- glm(
distress_int ~ current_ratio + debt_ratio + roa + sales_growth + firm_size,
data = train_data,
family = binomial(link = "logit")
)
ringkasan_model <- data.frame(
Keterangan = c(
"Jumlah observasi training",
"Null deviance",
"Residual deviance",
"Derajat bebas residual",
"AIC"
),
Nilai = c(
nobs(fd_fit),
round(fd_fit$null.deviance, 3),
round(fd_fit$deviance, 3),
fd_fit$df.residual,
round(AIC(fd_fit), 3)
)
)
knitr::kable(
ringkasan_model,
caption = "Ringkasan kecocokan model regresi logistik biner"
)| Keterangan | Nilai |
|---|---|
| Jumlah observasi training | 95.000 |
| Null deviance | 105.164 |
| Residual deviance | 66.847 |
| Derajat bebas residual | 89.000 |
| AIC | 78.847 |
coef_table <- broom::tidy(fd_fit) %>%
mutate(
odds_ratio = exp(estimate),
ci_low = exp(estimate - 1.96 * std.error),
ci_high = exp(estimate + 1.96 * std.error)
) %>%
arrange(p.value) %>%
transmute(
`Variabel` = term,
`Koefisien (β)` = round(estimate, 4),
`Odds Ratio` = round(odds_ratio, 4),
`Selang Kepercayaan 95%` = paste0(round(ci_low, 3), " – ", round(ci_high, 3)),
`p-value` = signif(p.value, 3),
`Signifikan (α = 0.05)` = ifelse(p.value < 0.05, "Ya ✓", "Tidak")
)
knitr::kable(
coef_table,
caption = "Estimasi koefisien, odds ratio, dan uji signifikansi"
)| Variabel | Koefisien (β) | Odds Ratio | Selang Kepercayaan 95% | p-value | Signifikan (α = 0.05) |
|---|---|---|---|---|---|
| roa | -16.0189 | 0.0000 | 0 – 0.001 | 0.000289 | Ya ✓ |
| sales_growth | 1.0455 | 2.8449 | 0.484 – 16.715 | 0.247000 | Tidak |
| current_ratio | 0.0462 | 1.0473 | 0.949 – 1.156 | 0.359000 | Tidak |
| firm_size | -0.1590 | 0.8530 | 0.607 – 1.199 | 0.361000 | Tidak |
| (Intercept) | -0.6364 | 0.5292 | 0.05 – 5.552 | 0.596000 | Tidak |
| debt_ratio | -0.2399 | 0.7867 | 0.212 – 2.914 | 0.720000 | Tidak |
p_train <- predict(fd_fit, newdata = train_data, type = "response")
p_test <- predict(fd_fit, newdata = test_data, type = "response")
prediction_preview <- head(
data.frame(
`Kode Saham` = test_data$kode_saham,
`Status Aktual` = test_data$financial_distress,
`Peluang Prediksi Distress` = round(p_test, 4),
check.names = FALSE
),
6
)
knitr::kable(
prediction_preview,
caption = "Contoh peluang prediksi financial distress pada data testing"
)| Kode Saham | Status Aktual | Peluang Prediksi Distress |
|---|---|---|
| CMPP | Distress | 0.9835 |
| GIAA | Non-distress | 0.0718 |
| HELI | Non-distress | 0.0716 |
| ASSA | Non-distress | 0.1161 |
| BIRD | Non-distress | 0.1012 |
| IMJS | Non-distress | 0.0929 |
safe_div <- function(num, den) {
ifelse(den == 0, NA_real_, num / den)
}
classification_metrics <- function(actual, prob, threshold = 0.5) {
pred <- as.integer(prob >= threshold)
tp <- sum(pred == 1 & actual == 1)
tn <- sum(pred == 0 & actual == 0)
fp <- sum(pred == 1 & actual == 0)
fn <- sum(pred == 0 & actual == 1)
sensitivity <- safe_div(tp, tp + fn)
specificity <- safe_div(tn, tn + fp)
precision <- safe_div(tp, tp + fp)
npv <- safe_div(tn, tn + fn)
accuracy <- safe_div(tp + tn, tp + tn + fp + fn)
data.frame(
threshold = threshold,
accuracy = accuracy,
error_rate = 1 - accuracy,
sensitivity = sensitivity,
specificity = specificity,
precision = precision,
negative_predictive_value = npv,
f1_score = safe_div(2 * precision * sensitivity, precision + sensitivity),
balanced_accuracy = (sensitivity + specificity) / 2,
false_positive_rate = 1 - specificity,
false_negative_rate = 1 - sensitivity
)
}
format_metrics_indonesia <- function(x) {
x %>%
transmute(
Threshold = threshold,
Akurasi = accuracy,
`Error rate` = error_rate,
Sensitivity = sensitivity,
Specificity = specificity,
Presisi = precision,
NPV = negative_predictive_value,
`F1-score` = f1_score,
`Balanced accuracy` = balanced_accuracy,
FPR = false_positive_rate,
FNR = false_negative_rate
)
}
confusion_matrix <- function(actual, prob, threshold = 0.5) {
pred_label <- factor(
ifelse(prob >= threshold, "Prediksi Distress", "Prediksi Non-distress"),
levels = c("Prediksi Distress", "Prediksi Non-distress")
)
actual_label <- factor(
ifelse(actual == 1, "Aktual Distress", "Aktual Non-distress"),
levels = c("Aktual Distress", "Aktual Non-distress")
)
addmargins(table(actual_label, pred_label))
}confusion_default <- confusion_matrix(test_data$distress_int, p_test, threshold = 0.5)
metrics_default <- classification_metrics(test_data$distress_int, p_test, threshold = 0.5) %>%
format_metrics_indonesia() %>%
mutate(across(where(is.numeric), round, 3))
knitr::kable(
confusion_default,
caption = "Confusion matrix testing pada threshold 0.50"
)| Prediksi Distress | Prediksi Non-distress | Sum | |
|---|---|---|---|
| Aktual Distress | 3 | 3 | 6 |
| Aktual Non-distress | 1 | 18 | 19 |
| Sum | 4 | 21 | 25 |
| Threshold | Akurasi | Error rate | Sensitivity | Specificity | Presisi | NPV | F1-score | Balanced accuracy | FPR | FNR |
|---|---|---|---|---|---|---|---|---|---|---|
| 0.5 | 0.84 | 0.16 | 0.5 | 0.947 | 0.75 | 0.857 | 0.6 | 0.724 | 0.053 | 0.5 |
roc_points <- function(actual, prob) {
thresholds <- c(Inf, sort(unique(prob), decreasing = TRUE), -Inf)
out <- lapply(thresholds, function(th) {
pred <- as.integer(prob >= th)
tp <- sum(pred == 1 & actual == 1)
tn <- sum(pred == 0 & actual == 0)
fp <- sum(pred == 1 & actual == 0)
fn <- sum(pred == 0 & actual == 1)
sensitivity <- safe_div(tp, tp + fn)
specificity <- safe_div(tn, tn + fp)
data.frame(
threshold = th,
sensitivity = sensitivity,
specificity = specificity,
fpr = 1 - specificity,
youden = sensitivity + specificity - 1
)
})
bind_rows(out)
}
auc_value <- function(roc_df) {
roc_sorted <- roc_df %>% arrange(fpr, sensitivity)
sum(
diff(roc_sorted$fpr) *
(head(roc_sorted$sensitivity, -1) + tail(roc_sorted$sensitivity, -1)) / 2
)
}
roc_train <- roc_points(train_data$distress_int, p_train) %>% mutate(data = "Training")
roc_test <- roc_points(test_data$distress_int, p_test) %>% mutate(data = "Testing")
auc_train <- auc_value(roc_train)
auc_test <- auc_value(roc_test)
optimal_train <- roc_train %>%
filter(is.finite(threshold)) %>%
arrange(desc(youden), desc(sensitivity)) %>%
slice(1)
threshold_opt <- optimal_train$threshold[1]
test_at_opt <- roc_points(test_data$distress_int, p_test) %>%
filter(is.finite(threshold)) %>%
slice_min(abs(threshold - threshold_opt), n = 1, with_ties = FALSE) %>%
mutate(data = "Testing pada threshold optimal")auc_table <- data.frame(
Data = c("Training", "Testing"),
AUC = round(c(auc_train, auc_test), 3)
)
threshold_table <- optimal_train %>%
transmute(
`Threshold optimal` = round(threshold, 3),
Sensitivity = round(sensitivity, 3),
Specificity = round(specificity, 3),
`Indeks Youden` = round(youden, 3)
)
knitr::kable(auc_table, caption = "Nilai AUC pada data training dan testing")| Data | AUC |
|---|---|
| Training | 0.912 |
| Testing | 0.939 |
knitr::kable(threshold_table, caption = "Threshold optimal berdasarkan indeks Youden (ROC training)")| Threshold optimal | Sensitivity | Specificity | Indeks Youden |
|---|---|---|---|
| 0.266 | 0.87 | 0.903 | 0.772 |
roc_plot <- bind_rows(roc_train, roc_test)
ggplot(roc_plot, aes(x = fpr, y = sensitivity, color = data)) +
geom_path(linewidth = 1.1) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "#6c757d") +
geom_point(
data = optimal_train,
aes(x = fpr, y = sensitivity),
inherit.aes = FALSE,
color = "#ffb703",
fill = "#fb8500",
shape = 21,
size = 4,
stroke = 1.2
) +
geom_point(
data = test_at_opt,
aes(x = fpr, y = sensitivity),
inherit.aes = FALSE,
color = "#8338ec",
fill = "#3a86ff",
shape = 24,
size = 4,
stroke = 1.2
) +
coord_equal() +
scale_color_manual(values = c("Training" = "#0077b6", "Testing" = "#e76f51")) +
labs(
title = "Kurva ROC Model Regresi Logistik Biner",
subtitle = paste0(
"AUC training = ", round(auc_train, 3),
"; AUC testing = ", round(auc_test, 3),
"; threshold optimal = ", round(threshold_opt, 3)
),
x = "False Positive Rate (1 – Specificity)",
y = "Sensitivity / True Positive Rate",
color = "Data"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")metrics_compare <- bind_rows(
classification_metrics(test_data$distress_int, p_test, threshold = 0.5) %>%
mutate(aturan = "Threshold 0.50"),
classification_metrics(test_data$distress_int, p_test, threshold = threshold_opt) %>%
mutate(aturan = "Threshold optimal ROC")
) %>%
select(aturan, everything()) %>%
format_metrics_indonesia() %>%
bind_cols(
`Aturan klasifikasi` = c("Threshold 0.50", "Threshold optimal ROC"), .
) %>%
select(`Aturan klasifikasi`, everything()) %>%
mutate(across(where(is.numeric), round, 3))
confusion_opt <- confusion_matrix(test_data$distress_int, p_test, threshold = threshold_opt)
knitr::kable(
metrics_compare,
caption = "Perbandingan metrik testing pada dua aturan threshold"
)| Aturan klasifikasi | Threshold | Akurasi | Error rate | Sensitivity | Specificity | Presisi | NPV | F1-score | Balanced accuracy | FPR | FNR |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Threshold 0.50 | 0.500 | 0.84 | 0.16 | 0.500 | 0.947 | 0.75 | 0.857 | 0.600 | 0.724 | 0.053 | 0.500 |
| Threshold optimal ROC | 0.266 | 0.88 | 0.12 | 0.667 | 0.947 | 0.80 | 0.900 | 0.727 | 0.807 | 0.053 | 0.333 |
| Prediksi Distress | Prediksi Non-distress | Sum | |
|---|---|---|---|
| Aktual Distress | 4 | 2 | 6 |
| Aktual Non-distress | 1 | 18 | 19 |
| Sum | 5 | 20 | 25 |
test_prob_plot <- test_data %>%
mutate(peluang_distress = p_test)
ggplot(test_prob_plot, aes(x = peluang_distress, fill = financial_distress)) +
geom_density(alpha = 0.55, color = "white", linewidth = 0.7) +
geom_vline(
xintercept = threshold_opt,
color = "#fb8500",
linewidth = 1.2,
linetype = "dashed"
) +
annotate(
"label",
x = threshold_opt,
y = Inf,
label = paste0("threshold = ", round(threshold_opt, 3)),
vjust = 1.4,
fill = "#fff3b0",
color = "#5f370e",
label.size = 0
) +
scale_fill_manual(values = c("Non-distress" = "#2a9d8f", "Distress" = "#e76f51")) +
labs(
title = "Distribusi Peluang Prediksi Financial Distress pada Data Testing",
x = "Peluang prediksi distress",
y = "Kepadatan",
fill = "Status aktual"
) +
theme_minimal(base_size = 12)## Model regresi logistik biner diestimasi menggunakan 95 observasi training dari total 120 observasi.
##
## Nilai AUC pada data testing sebesar 0.939 menunjukkan kemampuan diskriminasi model terhadap perusahaan dalam kondisi financial distress dan non-distress.
##
## Threshold optimal berdasarkan indeks Youden pada kurva ROC training adalah 0.266.
Berdasarkan hasil analisis:
Laporan dibuat otomatis menggunakan R Markdown.