# 1. LOAD LIBRARY
library(MASS)      
library(brant)     
library(dplyr)     
library(ggplot2)   
library(caret)     
library(DT)        
library(knitr)     
library(kableExtra)
library(corrplot)  
library(car)       

# 2. DATA PREPARATION & CLEANING
df <- read.csv("HeartDiseaseTrain-Test.csv", stringsAsFactors = TRUE)

# --- A. CEK MISSING VALUE ---
missing_data <- data.frame(Variabel = names(df), Jumlah_NA = colSums(is.na(df)))
kable(missing_data, caption = "<b>Tabel 1.1: Pengecekan Missing Value</b>") %>%
  kable_styling(bootstrap_options = "condensed", full_width = F)
Tabel 1.1: Pengecekan Missing Value
Variabel Jumlah_NA
age age 0
sex sex 0
chest_pain_type chest_pain_type 0
resting_blood_pressure resting_blood_pressure 0
cholestoral cholestoral 0
fasting_blood_sugar fasting_blood_sugar 0
rest_ecg rest_ecg 0
Max_heart_rate Max_heart_rate 0
exercise_induced_angina exercise_induced_angina 0
oldpeak oldpeak 0
slope slope 0
vessels_colored_by_flourosopy vessels_colored_by_flourosopy 0
thalassemia thalassemia 0
target target 0
# --- B. CEK OUTLIER (Visualisasi) ---
# Kita cek variabel numerik yang paling umum: Age dan Max Heart Rate
par(mfrow=c(1,2)) # Membagi layar jadi 2 kolom
boxplot(df$age, main="Outlier: Age", col="lightblue")
boxplot(df$Max_heart_rate, main="Outlier: Max Heart Rate", col="lightcoral")

par(mfrow=c(1,1)) # Kembalikan ke normal

# --- C. PROSES ORDINAL ---
df_model <- df %>%
  mutate(target_ordinal = case_when(
    target == 0 ~ "Sehat",  
    target == 1 & (vessels_colored_by_flourosopy == "Zero") ~ "Risiko Rendah", 
    target == 1 & (vessels_colored_by_flourosopy != "Zero") ~ "Risiko Tinggi"  
  ))

df_model$target_ordinal <- factor(df_model$target_ordinal, 
                                  levels = c("Sehat", "Risiko Rendah", "Risiko Tinggi"), 
                                  ordered = TRUE)

df_final <- df_model %>% select(-target)
df <- read.csv("HeartDiseaseTrain-Test.csv", stringsAsFactors = TRUE)

df_model <- df %>%
  mutate(target_ordinal = case_when(
    target == 0 ~ "Sehat",  
    target == 1 & (vessels_colored_by_flourosopy == "Zero") ~ "Risiko Rendah", 
    target == 1 & (vessels_colored_by_flourosopy != "Zero") ~ "Risiko Tinggi"  
  ))

df_model$target_ordinal <- factor(df_model$target_ordinal, 
                                  levels = c("Sehat", "Risiko Rendah", "Risiko Tinggi"), 
                                  ordered = TRUE)

df_final <- df_model %>% select(-target)

# 3. TABEL DATA (Interactive Title)
datatable(head(df_final, 50), 
          caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color: black; font-weight: bold; font-size: 18px;', 'Tabel 1: Preview Dataset Ordinal'),
          options = list(pageLength = 5))
# 4. HEATMAP (Title internal fungsi)
num_data <- df %>% select_if(is.numeric)
res_cor <- cor(num_data)
corrplot(res_cor, method = "color", type = "upper", 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.7,
         col = colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))(200),
         title = "\n\nGrafik 1: Heatmap Korelasi Antar Variabel Numerik", 
         mar=c(0,0,3,0))

# 5. UJI VIF (Tittle via Kable Caption)
vif_model <- lm(as.numeric(target_ordinal) ~ ., data = df_final)
vif_values <- vif(vif_model)
kable(as.data.frame(vif_values), 
      caption = "<b style='color:black; font-size:16px;'>Tabel 2: Hasil Pengujian Multikolinearitas (VIF)</b>", 
      escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
Tabel 2: Hasil Pengujian Multikolinearitas (VIF)
GVIF Df GVIF^(1/(2*Df))
age 1.551126 1 1.245442
sex 1.355486 1 1.164253
chest_pain_type 1.806342 3 1.103570
resting_blood_pressure 1.232774 1 1.110303
cholestoral 1.170485 1 1.081889
fasting_blood_sugar 1.138739 1 1.067117
rest_ecg 1.217927 2 1.050523
Max_heart_rate 1.771458 1 1.330961
exercise_induced_angina 1.517459 1 1.231852
oldpeak 1.937646 1 1.391993
slope 1.971660 2 1.184972
vessels_colored_by_flourosopy 1.790937 4 1.075561
thalassemia 1.692594 3 1.091672
# 6. REGRESI (Tittle via Kable Caption)
model_awal <- polr(target_ordinal ~ ., data = df_final, Hess = TRUE)
model_best <- step(model_awal, direction = "both", trace = 0)

ctable <- coef(summary(model_best))
p_val <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
hasil_stat <- cbind(ctable, "p_value" = round(p_val, 5))

kable(hasil_stat, 
      caption = "<b style='color:black; font-size:16px;'>Tabel 3: Estimasi Parameter Model dan Signifikansi (P-Value)</b>", 
      escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered")) %>%
  row_spec(which(hasil_stat[,4] < 0.05), bold = T, color = "darkblue")
Tabel 3: Estimasi Parameter Model dan Signifikansi (P-Value)
Value Std. Error t value p_value
age 0.0274092 0.0101949 2.6885082 0.00718
sexMale -1.1089123 0.1840103 -6.0263591 0.00000
chest_pain_typeAtypical angina -1.0582080 0.3178129 -3.3296574 0.00087
chest_pain_typeNon-anginal pain -0.4853027 0.2949815 -1.6451969 0.09993
chest_pain_typeTypical angina -2.0715989 0.3036428 -6.8224855 0.00000
resting_blood_pressure -0.0119050 0.0047933 -2.4836937 0.01300
rest_ecgNormal 0.7032483 0.8870178 0.7928232 0.42788
rest_ecgST-T wave abnormality 1.1087715 0.8892887 1.2468071 0.21247
Max_heart_rate 0.0132807 0.0045748 2.9030271 0.00370
exercise_induced_anginaYes -0.5884262 0.2006744 -2.9322435 0.00337
oldpeak -0.4104217 0.1011837 -4.0562060 0.00005
slopeFlat -0.9230723 0.1912663 -4.8261115 0.00000
slopeUpsloping -0.1381025 0.3640716 -0.3793279 0.70444
vessels_colored_by_flourosopyOne -3.9665686 0.7437438 -5.3332457 0.00000
vessels_colored_by_flourosopyThree -4.9114073 0.8420629 -5.8325893 0.00000
vessels_colored_by_flourosopyTwo -5.1864755 0.8026195 -6.4619355 0.00000
vessels_colored_by_flourosopyZero -4.1854375 0.7295177 -5.7372669 0.00000
thalassemiaNo -1.2745430 0.9821584 -1.2976960 0.19439
thalassemiaNormal -0.1657682 0.3517070 -0.4713248 0.63741
thalassemiaReversable Defect -1.2553513 0.1936385 -6.4829623 0.00000
Sehat&#124;Risiko Rendah -5.0917332 1.6597825 -3.0677111 0.00216
Risiko Rendah&#124;Risiko Tinggi -1.6014438 1.6466242 -0.9725618 0.33077
# 7. UJI BRANT (WAJIB PAKAI KABLE SUPAYA MUNCUL TABEL)
res_brant <- brant(model_best)
## -------------------------------------------------------------------- 
## Test for             X2  df  probability 
## -------------------------------------------------------------------- 
## Omnibus                  2023.53 20  0
## age                  0   1   0.97
## sexMale                  2.95    1   0.09
## chest_pain_typeAtypical angina   4.65    1   0.03
## chest_pain_typeNon-anginal pain  0.44    1   0.51
## chest_pain_typeTypical angina    6.2 1   0.01
## resting_blood_pressure       4.59    1   0.03
## rest_ecgNormal               0   1   1
## rest_ecgST-T wave abnormality    0   1   1
## Max_heart_rate               4.9 1   0.03
## exercise_induced_anginaYes       8.04    1   0
## oldpeak                  5.16    1   0.02
## slopeFlat                0.76    1   0.38
## slopeUpsloping               5.1 1   0.02
## vessels_colored_by_flourosopyOne 1.19    1   0.28
## vessels_colored_by_flourosopyThree   0   1   0.98
## vessels_colored_by_flourosopyTwo 1.1 1   0.29
## vessels_colored_by_flourosopyZero    0   1   0.98
## thalassemiaNo                0   1   1
## thalassemiaNormal            0   1   0.99
## thalassemiaReversable Defect     0.34    1   0.56
## -------------------------------------------------------------------- 
## 
## H0: Parallel Regression Assumption holds
# Kita paksa ambil angkanya saja supaya jadi tabel rapi
brant_tabel <- as.data.frame(unclass(res_brant))

# Sekarang kita panggil kable supaya muncul di RPubs
kable(brant_tabel, 
      caption = "<b style='color:black; font-size:16px;'>Tabel 4: Hasil Uji Asumsi Parallel Odds (Brant Test)</b>", 
      escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = F, 
                position = "left") %>%
  row_spec(1, bold = T, background = "#fdebd0") # Menandai baris Omnibus
Tabel 4: Hasil Uji Asumsi Parallel Odds (Brant Test)
X2 df probability
Omnibus 2023.5337140 20 0.0000000
age 0.0015954 1 0.9681389
sexMale 2.9517585 1 0.0857842
chest_pain_typeAtypical angina 4.6489829 1 0.0310720
chest_pain_typeNon-anginal pain 0.4391236 1 0.5075458
chest_pain_typeTypical angina 6.1978738 1 0.0127904
resting_blood_pressure 4.5856680 1 0.0322404
rest_ecgNormal 0.0000050 1 0.9982159
rest_ecgST-T wave abnormality 0.0000069 1 0.9979116
Max_heart_rate 4.9020855 1 0.0268243
exercise_induced_anginaYes 8.0375075 1 0.0045819
oldpeak 5.1624878 1 0.0230798
slopeFlat 0.7567318 1 0.3843532
slopeUpsloping 5.0983879 1 0.0239481
vessels_colored_by_flourosopyOne 1.1902295 1 0.2752833
vessels_colored_by_flourosopyThree 0.0004677 1 0.9827455
vessels_colored_by_flourosopyTwo 1.1021589 1 0.2937928
vessels_colored_by_flourosopyZero 0.0008365 1 0.9769264
thalassemiaNo 0.0000001 1 0.9997668
thalassemiaNormal 0.0000571 1 0.9939713
thalassemiaReversable Defect 0.3436397 1 0.5577358
# 8. CONFUSION MATRIX (Tittle via Kable Caption)
prediksi <- predict(model_best, df_final)
conf_matrix <- confusionMatrix(prediksi, df_final$target_ordinal)

kable(as.data.frame(conf_matrix$table), 
      caption = "<b style='color:black; font-size:16px;'>Tabel 5: Confusion Matrix Performa Model</b>", 
      escape = FALSE) %>%
  kable_styling(bootstrap_options = "bordered", full_width = F)
Tabel 5: Confusion Matrix Performa Model
Prediction Reference Freq
Sehat Sehat 407
Risiko Rendah Sehat 92
Risiko Tinggi Sehat 0
Sehat Risiko Rendah 73
Risiko Rendah Risiko Rendah 323
Risiko Tinggi Risiko Rendah 19
Sehat Risiko Tinggi 12
Risiko Rendah Risiko Tinggi 71
Risiko Tinggi Risiko Tinggi 28
# 9. AKURASI FINAL (Custom Text Output)
akurasi_final <- round(conf_matrix$overall['Accuracy'], 4) * 100
cat("TINGKAT AKURASI MODEL FINAL ADALAH", akurasi_final, "%")
## TINGKAT AKURASI MODEL FINAL ADALAH 73.95 %