# 1. LOAD LIBRARY
library(MASS)
library(brant)
library(dplyr)
library(ggplot2)
library(caret)
library(DT)
library(knitr)
library(kableExtra)
library(corrplot)
library(car)
# 2. DATA PREPARATION & CLEANING
df <- read.csv("HeartDiseaseTrain-Test.csv", stringsAsFactors = TRUE)
# --- A. CEK MISSING VALUE ---
missing_data <- data.frame(Variabel = names(df), Jumlah_NA = colSums(is.na(df)))
kable(missing_data, caption = "<b>Tabel 1.1: Pengecekan Missing Value</b>") %>%
kable_styling(bootstrap_options = "condensed", full_width = F)
Tabel 1.1: Pengecekan Missing Value
|
|
Variabel
|
Jumlah_NA
|
|
age
|
age
|
0
|
|
sex
|
sex
|
0
|
|
chest_pain_type
|
chest_pain_type
|
0
|
|
resting_blood_pressure
|
resting_blood_pressure
|
0
|
|
cholestoral
|
cholestoral
|
0
|
|
fasting_blood_sugar
|
fasting_blood_sugar
|
0
|
|
rest_ecg
|
rest_ecg
|
0
|
|
Max_heart_rate
|
Max_heart_rate
|
0
|
|
exercise_induced_angina
|
exercise_induced_angina
|
0
|
|
oldpeak
|
oldpeak
|
0
|
|
slope
|
slope
|
0
|
|
vessels_colored_by_flourosopy
|
vessels_colored_by_flourosopy
|
0
|
|
thalassemia
|
thalassemia
|
0
|
|
target
|
target
|
0
|
# --- B. CEK OUTLIER (Visualisasi) ---
# Kita cek variabel numerik yang paling umum: Age dan Max Heart Rate
par(mfrow=c(1,2)) # Membagi layar jadi 2 kolom
boxplot(df$age, main="Outlier: Age", col="lightblue")
boxplot(df$Max_heart_rate, main="Outlier: Max Heart Rate", col="lightcoral")

par(mfrow=c(1,1)) # Kembalikan ke normal
# --- C. PROSES ORDINAL ---
df_model <- df %>%
mutate(target_ordinal = case_when(
target == 0 ~ "Sehat",
target == 1 & (vessels_colored_by_flourosopy == "Zero") ~ "Risiko Rendah",
target == 1 & (vessels_colored_by_flourosopy != "Zero") ~ "Risiko Tinggi"
))
df_model$target_ordinal <- factor(df_model$target_ordinal,
levels = c("Sehat", "Risiko Rendah", "Risiko Tinggi"),
ordered = TRUE)
df_final <- df_model %>% select(-target)
df <- read.csv("HeartDiseaseTrain-Test.csv", stringsAsFactors = TRUE)
df_model <- df %>%
mutate(target_ordinal = case_when(
target == 0 ~ "Sehat",
target == 1 & (vessels_colored_by_flourosopy == "Zero") ~ "Risiko Rendah",
target == 1 & (vessels_colored_by_flourosopy != "Zero") ~ "Risiko Tinggi"
))
df_model$target_ordinal <- factor(df_model$target_ordinal,
levels = c("Sehat", "Risiko Rendah", "Risiko Tinggi"),
ordered = TRUE)
df_final <- df_model %>% select(-target)
# 3. TABEL DATA (Interactive Title)
datatable(head(df_final, 50),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color: black; font-weight: bold; font-size: 18px;', 'Tabel 1: Preview Dataset Ordinal'),
options = list(pageLength = 5))
# 4. HEATMAP (Title internal fungsi)
num_data <- df %>% select_if(is.numeric)
res_cor <- cor(num_data)
corrplot(res_cor, method = "color", type = "upper",
tl.col = "black", tl.srt = 45,
addCoef.col = "black", number.cex = 0.7,
col = colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))(200),
title = "\n\nGrafik 1: Heatmap Korelasi Antar Variabel Numerik",
mar=c(0,0,3,0))

# 5. UJI VIF (Tittle via Kable Caption)
vif_model <- lm(as.numeric(target_ordinal) ~ ., data = df_final)
vif_values <- vif(vif_model)
kable(as.data.frame(vif_values),
caption = "<b style='color:black; font-size:16px;'>Tabel 2: Hasil Pengujian Multikolinearitas (VIF)</b>",
escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
Tabel 2: Hasil Pengujian
Multikolinearitas (VIF)
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
age
|
1.551126
|
1
|
1.245442
|
|
sex
|
1.355486
|
1
|
1.164253
|
|
chest_pain_type
|
1.806342
|
3
|
1.103570
|
|
resting_blood_pressure
|
1.232774
|
1
|
1.110303
|
|
cholestoral
|
1.170485
|
1
|
1.081889
|
|
fasting_blood_sugar
|
1.138739
|
1
|
1.067117
|
|
rest_ecg
|
1.217927
|
2
|
1.050523
|
|
Max_heart_rate
|
1.771458
|
1
|
1.330961
|
|
exercise_induced_angina
|
1.517459
|
1
|
1.231852
|
|
oldpeak
|
1.937646
|
1
|
1.391993
|
|
slope
|
1.971660
|
2
|
1.184972
|
|
vessels_colored_by_flourosopy
|
1.790937
|
4
|
1.075561
|
|
thalassemia
|
1.692594
|
3
|
1.091672
|
# 6. REGRESI (Tittle via Kable Caption)
model_awal <- polr(target_ordinal ~ ., data = df_final, Hess = TRUE)
model_best <- step(model_awal, direction = "both", trace = 0)
ctable <- coef(summary(model_best))
p_val <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
hasil_stat <- cbind(ctable, "p_value" = round(p_val, 5))
kable(hasil_stat,
caption = "<b style='color:black; font-size:16px;'>Tabel 3: Estimasi Parameter Model dan Signifikansi (P-Value)</b>",
escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered")) %>%
row_spec(which(hasil_stat[,4] < 0.05), bold = T, color = "darkblue")
Tabel 3: Estimasi Parameter
Model dan Signifikansi (P-Value)
|
|
Value
|
Std. Error
|
t value
|
p_value
|
|
age
|
0.0274092
|
0.0101949
|
2.6885082
|
0.00718
|
|
sexMale
|
-1.1089123
|
0.1840103
|
-6.0263591
|
0.00000
|
|
chest_pain_typeAtypical angina
|
-1.0582080
|
0.3178129
|
-3.3296574
|
0.00087
|
|
chest_pain_typeNon-anginal pain
|
-0.4853027
|
0.2949815
|
-1.6451969
|
0.09993
|
|
chest_pain_typeTypical angina
|
-2.0715989
|
0.3036428
|
-6.8224855
|
0.00000
|
|
resting_blood_pressure
|
-0.0119050
|
0.0047933
|
-2.4836937
|
0.01300
|
|
rest_ecgNormal
|
0.7032483
|
0.8870178
|
0.7928232
|
0.42788
|
|
rest_ecgST-T wave abnormality
|
1.1087715
|
0.8892887
|
1.2468071
|
0.21247
|
|
Max_heart_rate
|
0.0132807
|
0.0045748
|
2.9030271
|
0.00370
|
|
exercise_induced_anginaYes
|
-0.5884262
|
0.2006744
|
-2.9322435
|
0.00337
|
|
oldpeak
|
-0.4104217
|
0.1011837
|
-4.0562060
|
0.00005
|
|
slopeFlat
|
-0.9230723
|
0.1912663
|
-4.8261115
|
0.00000
|
|
slopeUpsloping
|
-0.1381025
|
0.3640716
|
-0.3793279
|
0.70444
|
|
vessels_colored_by_flourosopyOne
|
-3.9665686
|
0.7437438
|
-5.3332457
|
0.00000
|
|
vessels_colored_by_flourosopyThree
|
-4.9114073
|
0.8420629
|
-5.8325893
|
0.00000
|
|
vessels_colored_by_flourosopyTwo
|
-5.1864755
|
0.8026195
|
-6.4619355
|
0.00000
|
|
vessels_colored_by_flourosopyZero
|
-4.1854375
|
0.7295177
|
-5.7372669
|
0.00000
|
|
thalassemiaNo
|
-1.2745430
|
0.9821584
|
-1.2976960
|
0.19439
|
|
thalassemiaNormal
|
-0.1657682
|
0.3517070
|
-0.4713248
|
0.63741
|
|
thalassemiaReversable Defect
|
-1.2553513
|
0.1936385
|
-6.4829623
|
0.00000
|
|
Sehat|Risiko Rendah
|
-5.0917332
|
1.6597825
|
-3.0677111
|
0.00216
|
|
Risiko Rendah|Risiko Tinggi
|
-1.6014438
|
1.6466242
|
-0.9725618
|
0.33077
|
# 7. UJI BRANT (WAJIB PAKAI KABLE SUPAYA MUNCUL TABEL)
res_brant <- brant(model_best)
## --------------------------------------------------------------------
## Test for X2 df probability
## --------------------------------------------------------------------
## Omnibus 2023.53 20 0
## age 0 1 0.97
## sexMale 2.95 1 0.09
## chest_pain_typeAtypical angina 4.65 1 0.03
## chest_pain_typeNon-anginal pain 0.44 1 0.51
## chest_pain_typeTypical angina 6.2 1 0.01
## resting_blood_pressure 4.59 1 0.03
## rest_ecgNormal 0 1 1
## rest_ecgST-T wave abnormality 0 1 1
## Max_heart_rate 4.9 1 0.03
## exercise_induced_anginaYes 8.04 1 0
## oldpeak 5.16 1 0.02
## slopeFlat 0.76 1 0.38
## slopeUpsloping 5.1 1 0.02
## vessels_colored_by_flourosopyOne 1.19 1 0.28
## vessels_colored_by_flourosopyThree 0 1 0.98
## vessels_colored_by_flourosopyTwo 1.1 1 0.29
## vessels_colored_by_flourosopyZero 0 1 0.98
## thalassemiaNo 0 1 1
## thalassemiaNormal 0 1 0.99
## thalassemiaReversable Defect 0.34 1 0.56
## --------------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
# Kita paksa ambil angkanya saja supaya jadi tabel rapi
brant_tabel <- as.data.frame(unclass(res_brant))
# Sekarang kita panggil kable supaya muncul di RPubs
kable(brant_tabel,
caption = "<b style='color:black; font-size:16px;'>Tabel 4: Hasil Uji Asumsi Parallel Odds (Brant Test)</b>",
escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = F,
position = "left") %>%
row_spec(1, bold = T, background = "#fdebd0") # Menandai baris Omnibus
Tabel 4: Hasil Uji Asumsi
Parallel Odds (Brant Test)
|
|
X2
|
df
|
probability
|
|
Omnibus
|
2023.5337140
|
20
|
0.0000000
|
|
age
|
0.0015954
|
1
|
0.9681389
|
|
sexMale
|
2.9517585
|
1
|
0.0857842
|
|
chest_pain_typeAtypical angina
|
4.6489829
|
1
|
0.0310720
|
|
chest_pain_typeNon-anginal pain
|
0.4391236
|
1
|
0.5075458
|
|
chest_pain_typeTypical angina
|
6.1978738
|
1
|
0.0127904
|
|
resting_blood_pressure
|
4.5856680
|
1
|
0.0322404
|
|
rest_ecgNormal
|
0.0000050
|
1
|
0.9982159
|
|
rest_ecgST-T wave abnormality
|
0.0000069
|
1
|
0.9979116
|
|
Max_heart_rate
|
4.9020855
|
1
|
0.0268243
|
|
exercise_induced_anginaYes
|
8.0375075
|
1
|
0.0045819
|
|
oldpeak
|
5.1624878
|
1
|
0.0230798
|
|
slopeFlat
|
0.7567318
|
1
|
0.3843532
|
|
slopeUpsloping
|
5.0983879
|
1
|
0.0239481
|
|
vessels_colored_by_flourosopyOne
|
1.1902295
|
1
|
0.2752833
|
|
vessels_colored_by_flourosopyThree
|
0.0004677
|
1
|
0.9827455
|
|
vessels_colored_by_flourosopyTwo
|
1.1021589
|
1
|
0.2937928
|
|
vessels_colored_by_flourosopyZero
|
0.0008365
|
1
|
0.9769264
|
|
thalassemiaNo
|
0.0000001
|
1
|
0.9997668
|
|
thalassemiaNormal
|
0.0000571
|
1
|
0.9939713
|
|
thalassemiaReversable Defect
|
0.3436397
|
1
|
0.5577358
|
# 8. CONFUSION MATRIX (Tittle via Kable Caption)
prediksi <- predict(model_best, df_final)
conf_matrix <- confusionMatrix(prediksi, df_final$target_ordinal)
kable(as.data.frame(conf_matrix$table),
caption = "<b style='color:black; font-size:16px;'>Tabel 5: Confusion Matrix Performa Model</b>",
escape = FALSE) %>%
kable_styling(bootstrap_options = "bordered", full_width = F)
Tabel 5: Confusion Matrix
Performa Model
|
Prediction
|
Reference
|
Freq
|
|
Sehat
|
Sehat
|
407
|
|
Risiko Rendah
|
Sehat
|
92
|
|
Risiko Tinggi
|
Sehat
|
0
|
|
Sehat
|
Risiko Rendah
|
73
|
|
Risiko Rendah
|
Risiko Rendah
|
323
|
|
Risiko Tinggi
|
Risiko Rendah
|
19
|
|
Sehat
|
Risiko Tinggi
|
12
|
|
Risiko Rendah
|
Risiko Tinggi
|
71
|
|
Risiko Tinggi
|
Risiko Tinggi
|
28
|
# 9. AKURASI FINAL (Custom Text Output)
akurasi_final <- round(conf_matrix$overall['Accuracy'], 4) * 100
cat("TINGKAT AKURASI MODEL FINAL ADALAH", akurasi_final, "%")
## TINGKAT AKURASI MODEL FINAL ADALAH 73.95 %