Analisis Kesehatan Janin melalui CTG Dengan Metode Linear Diskriminan Analisis dan Multinomial Regresi Logistik

# Set working directory
setwd("D:/analisis multivariat_smt4")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Loading required package: lattice

library(UBL)

## Warning: package 'UBL' was built under R version 4.4.3

## Loading required package: MBA

## Warning: package 'MBA' was built under R version 4.4.3

## Loading required package: gstat

## Warning: package 'gstat' was built under R version 4.4.3

## Loading required package: automap

## Warning: package 'automap' was built under R version 4.4.3

## Loading required package: sp

## Warning: package 'sp' was built under R version 4.4.3

## Loading required package: randomForest

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

## The following object is masked from 'package:dplyr':
## 
##     combine

library(factoextra)

## Warning: package 'factoextra' was built under R version 4.4.3

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(MASS)

## Warning: package 'MASS' was built under R version 4.4.3

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(nnet)

## Warning: package 'nnet' was built under R version 4.4.3

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.4.3

## corrplot 0.95 loaded

library(psych)

## Warning: package 'psych' was built under R version 4.4.3

## 
## Attaching package: 'psych'

## The following object is masked from 'package:UBL':
## 
##     phi

## The following object is masked from 'package:randomForest':
## 
##     outlier

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(mvnormtest)
library(biotools)

## Warning: package 'biotools' was built under R version 4.4.3

## ---
## biotools version 4.3

library(ggplot2)
library(tidyr)

# Set seed for reproducibility
set.seed(42)

Load Dataset

data <- read.csv("fetal_health.csv")
summary(data)

##  baseline.value  accelerations      fetal_movement     uterine_contractions
##  Min.   :106.0   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000    
##  1st Qu.:126.0   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.002000    
##  Median :133.0   Median :0.002000   Median :0.000000   Median :0.004000    
##  Mean   :133.3   Mean   :0.003178   Mean   :0.009481   Mean   :0.004366    
##  3rd Qu.:140.0   3rd Qu.:0.006000   3rd Qu.:0.003000   3rd Qu.:0.007000    
##  Max.   :160.0   Max.   :0.019000   Max.   :0.481000   Max.   :0.015000    
##  light_decelerations severe_decelerations prolongued_decelerations
##  Min.   :0.000000    Min.   :0.000e+00    Min.   :0.0000000       
##  1st Qu.:0.000000    1st Qu.:0.000e+00    1st Qu.:0.0000000       
##  Median :0.000000    Median :0.000e+00    Median :0.0000000       
##  Mean   :0.001889    Mean   :3.293e-06    Mean   :0.0001585       
##  3rd Qu.:0.003000    3rd Qu.:0.000e+00    3rd Qu.:0.0000000       
##  Max.   :0.015000    Max.   :1.000e-03    Max.   :0.0050000       
##  abnormal_short_term_variability mean_value_of_short_term_variability
##  Min.   :12.00                   Min.   :0.200                       
##  1st Qu.:32.00                   1st Qu.:0.700                       
##  Median :49.00                   Median :1.200                       
##  Mean   :46.99                   Mean   :1.333                       
##  3rd Qu.:61.00                   3rd Qu.:1.700                       
##  Max.   :87.00                   Max.   :7.000                       
##  percentage_of_time_with_abnormal_long_term_variability
##  Min.   : 0.000                                        
##  1st Qu.: 0.000                                        
##  Median : 0.000                                        
##  Mean   : 9.847                                        
##  3rd Qu.:11.000                                        
##  Max.   :91.000                                        
##  mean_value_of_long_term_variability histogram_width  histogram_min   
##  Min.   : 0.000                      Min.   :  3.00   Min.   : 50.00  
##  1st Qu.: 4.600                      1st Qu.: 37.00   1st Qu.: 67.00  
##  Median : 7.400                      Median : 67.50   Median : 93.00  
##  Mean   : 8.188                      Mean   : 70.45   Mean   : 93.58  
##  3rd Qu.:10.800                      3rd Qu.:100.00   3rd Qu.:120.00  
##  Max.   :50.700                      Max.   :180.00   Max.   :159.00  
##  histogram_max histogram_number_of_peaks histogram_number_of_zeroes
##  Min.   :122   Min.   : 0.000            Min.   : 0.0000           
##  1st Qu.:152   1st Qu.: 2.000            1st Qu.: 0.0000           
##  Median :162   Median : 3.000            Median : 0.0000           
##  Mean   :164   Mean   : 4.068            Mean   : 0.3236           
##  3rd Qu.:174   3rd Qu.: 6.000            3rd Qu.: 0.0000           
##  Max.   :238   Max.   :18.000            Max.   :10.0000           
##  histogram_mode  histogram_mean  histogram_median histogram_variance
##  Min.   : 60.0   Min.   : 73.0   Min.   : 77.0    Min.   :  0.00    
##  1st Qu.:129.0   1st Qu.:125.0   1st Qu.:129.0    1st Qu.:  2.00    
##  Median :139.0   Median :136.0   Median :139.0    Median :  7.00    
##  Mean   :137.5   Mean   :134.6   Mean   :138.1    Mean   : 18.81    
##  3rd Qu.:148.0   3rd Qu.:145.0   3rd Qu.:148.0    3rd Qu.: 24.00    
##  Max.   :187.0   Max.   :182.0   Max.   :186.0    Max.   :269.00    
##  histogram_tendency  fetal_health  
##  Min.   :-1.0000    Min.   :1.000  
##  1st Qu.: 0.0000    1st Qu.:1.000  
##  Median : 0.0000    Median :1.000  
##  Mean   : 0.3203    Mean   :1.304  
##  3rd Qu.: 1.0000    3rd Qu.:1.000  
##  Max.   : 1.0000    Max.   :3.000

Korelasi Awal

# Mengambil hanya kolom numerik
numeric_data <- data[, sapply(data, is.numeric)]

cor_matrix <- cor(numeric_data, use = "pairwise.complete.obs")

# Visualisasi 
corrplot(cor_matrix, method = "color", type = "upper", 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.2,
         tl.cex = 0.4,
         cl.cex = 0.7,
         mar = c(0, 0, 2, 0),
         title = "Correlation Matrix Before Preprocessing")

Missing Value

missing_values <- colSums(is.na(data))
cat("Missing Values:\n")

## Missing Values:

print(missing_values)

##                                         baseline.value 
##                                                      0 
##                                          accelerations 
##                                                      0 
##                                         fetal_movement 
##                                                      0 
##                                   uterine_contractions 
##                                                      0 
##                                    light_decelerations 
##                                                      0 
##                                   severe_decelerations 
##                                                      0 
##                               prolongued_decelerations 
##                                                      0 
##                        abnormal_short_term_variability 
##                                                      0 
##                   mean_value_of_short_term_variability 
##                                                      0 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                      0 
##                    mean_value_of_long_term_variability 
##                                                      0 
##                                        histogram_width 
##                                                      0 
##                                          histogram_min 
##                                                      0 
##                                          histogram_max 
##                                                      0 
##                              histogram_number_of_peaks 
##                                                      0 
##                             histogram_number_of_zeroes 
##                                                      0 
##                                         histogram_mode 
##                                                      0 
##                                         histogram_mean 
##                                                      0 
##                                       histogram_median 
##                                                      0 
##                                     histogram_variance 
##                                                      0 
##                                     histogram_tendency 
##                                                      0 
##                                           fetal_health 
##                                                      0

# Bar chart untuk missing values
missing_data <- data.frame(
  Variable = names(missing_values),
  Missing_Count = missing_values
)

p_missing <- ggplot(missing_data, aes(x = Variable, y = Missing_Count, fill = Variable)) +
  geom_bar(stat = "identity") +
  labs(title = "Missing Values per Variable", x = "Variable", y = "Count of Missing Values") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

p_missing

Deteksi Outlier

detect_outliers <- function(x) {
  Q1 <- quantile(x, 0.25)
  Q3 <- quantile(x, 0.75)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  outliers <- sum(x < lower_bound | x > upper_bound, na.rm = TRUE)
  return(outliers)
}

# Hitung outliers untuk tiap kolom (tanpa target)
outliers_report <- sapply(data[, -ncol(data)], detect_outliers)
cat("Outliers Count per Feature:\n")

## Outliers Count per Feature:

print(outliers_report)

##                                         baseline.value 
##                                                      0 
##                                          accelerations 
##                                                     14 
##                                         fetal_movement 
##                                                    307 
##                                   uterine_contractions 
##                                                      1 
##                                    light_decelerations 
##                                                    150 
##                                   severe_decelerations 
##                                                      7 
##                               prolongued_decelerations 
##                                                    178 
##                        abnormal_short_term_variability 
##                                                      0 
##                   mean_value_of_short_term_variability 
##                                                     70 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                    309 
##                    mean_value_of_long_term_variability 
##                                                     71 
##                                        histogram_width 
##                                                      0 
##                                          histogram_min 
##                                                      0 
##                                          histogram_max 
##                                                     24 
##                              histogram_number_of_peaks 
##                                                     19 
##                             histogram_number_of_zeroes 
##                                                    502 
##                                         histogram_mode 
##                                                     73 
##                                         histogram_mean 
##                                                     45 
##                                       histogram_median 
##                                                     28 
##                                     histogram_variance 
##                                                    184 
##                                     histogram_tendency 
##                                                      0

Boxplot Sebelum Preprocessing

data_long <- pivot_longer(data[, -ncol(data)], cols = everything(), names_to = "Variable", values_to = "Value")

# Boxplot
p_box_before <- ggplot(data_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot(show.legend = FALSE) +
  theme_minimal() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

p_box_before

Winsorizing untuk Mengurangi Outlier

winsorize <- function(x) {
  Q1 <- quantile(x, 0.25)
  Q3 <- quantile(x, 0.75)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  x[x < lower_bound] <- lower_bound
  x[x > upper_bound] <- upper_bound
  return(x)
}

# Terapkan winsorizing ke semua fitur kecuali target
data_winsorized <- data
data_winsorized[, -ncol(data)] <- lapply(data[, -ncol(data)], winsorize)

# Cek outlier setelah winsorizing
outliers_report_winsorized <- sapply(data_winsorized[, -ncol(data_winsorized)], detect_outliers)
cat("Outliers Count per Feature After Winsorizing:\n")

## Outliers Count per Feature After Winsorizing:

print(outliers_report_winsorized)

##                                         baseline.value 
##                                                      0 
##                                          accelerations 
##                                                      0 
##                                         fetal_movement 
##                                                      0 
##                                   uterine_contractions 
##                                                      0 
##                                    light_decelerations 
##                                                      0 
##                                   severe_decelerations 
##                                                      0 
##                               prolongued_decelerations 
##                                                      0 
##                        abnormal_short_term_variability 
##                                                      0 
##                   mean_value_of_short_term_variability 
##                                                      0 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                      0 
##                    mean_value_of_long_term_variability 
##                                                      0 
##                                        histogram_width 
##                                                      0 
##                                          histogram_min 
##                                                      0 
##                                          histogram_max 
##                                                      0 
##                              histogram_number_of_peaks 
##                                                      0 
##                             histogram_number_of_zeroes 
##                                                      0 
##                                         histogram_mode 
##                                                      0 
##                                         histogram_mean 
##                                                      0 
##                                       histogram_median 
##                                                      0 
##                                     histogram_variance 
##                                                      0 
##                                     histogram_tendency 
##                                                      0

Visualisasi Boxplot Setelah Winsorizing

data_winsorized_long <- pivot_longer(data_winsorized[, -ncol(data_winsorized)], 
                                     cols = everything(), 
                                     names_to = "Variable", values_to = "Value")

# Boxplot setelah winsorizing
p_box_after <- ggplot(data_winsorized_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot() +
  labs(title = "Box Plot of Features After Winsorizing", x = "Variable", y = "Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

p_box_after

Pengecekan dan Penghapusan Kolom Konstan atau Nol Varians

# Hitung varians tiap kolom numerik
variances <- apply(data_winsorized[, -ncol(data_winsorized)], 2, var)

# Deteksi kolom dengan varians nol atau NA
zero_variance_cols <- names(variances[variances == 0 | is.na(variances)])

if (length(zero_variance_cols) > 0) {
  cat("Removing constant or zero-variance columns:", zero_variance_cols, "\n")
  data_winsorized <- data_winsorized[, !names(data_winsorized) %in% zero_variance_cols]
} else {
  cat("No constant or zero-variance columns detected.\n")
}

## Removing constant or zero-variance columns: severe_decelerations prolongued_decelerations histogram_number_of_zeroes

Deteksi dan Penghapusan Baris Duplikat

# Jumlah baris duplikat
duplicates <- sum(duplicated(data_winsorized))
cat("Number of Duplicate Rows:\n")

## Number of Duplicate Rows:

print(duplicates)

## [1] 17

# Hapus baris duplikat
data_winsorized <- data_winsorized[!duplicated(data_winsorized), ]
cat("Dataset Dimensions after Removing Duplicates:\n")

## Dataset Dimensions after Removing Duplicates:

print(dim(data_winsorized))

## [1] 2109   19

Penghapusan Kolom yang Sangat Berkorelasi

# Fungsi hapus fitur yang sangat berkorelasi
remove_correlated <- function(X, threshold = 0.8) {
  cor_matrix <- cor(X, use = "complete.obs")
  high_cor <- which(abs(cor_matrix) > threshold & upper.tri(cor_matrix), arr.ind = TRUE)
  if (nrow(high_cor) > 0) {
    to_remove <- unique(high_cor[, 2])
    cat("Removing highly correlated variables:", colnames(X)[to_remove], "\n")
    X <- X[, -to_remove, drop = FALSE]
  } else {
    cat("No highly correlated variables detected.\n")
  }
  return(X)
}

# Terapkan pada data winsorized tanpa target
numerical_features <- data_winsorized[, -which(names(data_winsorized) == "fetal_health")]
numerical_features <- remove_correlated(numerical_features, threshold = 0.8)

## Removing highly correlated variables: histogram_min histogram_mean histogram_median

# Gabungkan kembali dengan target
data_winsorized <- cbind(numerical_features, fetal_health = data_winsorized$fetal_health)

# Tampilkan matriks korelasi setelah penghapusan
cor_matrix <- cor(numerical_features, use = "complete.obs")
cat("Correlation Matrix After Removing Highly Correlated Columns:\n")

## Correlation Matrix After Removing Highly Correlated Columns:

print(round(cor_matrix, 3))

##                                                        baseline.value
## baseline.value                                                  1.000
## accelerations                                                  -0.081
## fetal_movement                                                 -0.024
## uterine_contractions                                           -0.148
## light_decelerations                                            -0.162
## abnormal_short_term_variability                                 0.303
## mean_value_of_short_term_variability                           -0.312
## percentage_of_time_with_abnormal_long_term_variability          0.332
## mean_value_of_long_term_variability                            -0.046
## histogram_width                                                -0.146
## histogram_max                                                   0.294
## histogram_number_of_peaks                                      -0.111
## histogram_mode                                                  0.765
## histogram_variance                                             -0.175
## histogram_tendency                                              0.295
##                                                        accelerations
## baseline.value                                                -0.081
## accelerations                                                  1.000
## fetal_movement                                                 0.088
## uterine_contractions                                           0.088
## light_decelerations                                           -0.098
## abnormal_short_term_variability                               -0.280
## mean_value_of_short_term_variability                           0.232
## percentage_of_time_with_abnormal_long_term_variability        -0.425
## mean_value_of_long_term_variability                           -0.157
## histogram_width                                                0.299
## histogram_max                                                  0.411
## histogram_number_of_peaks                                      0.194
## histogram_mode                                                 0.247
## histogram_variance                                             0.204
## histogram_tendency                                             0.030
##                                                        fetal_movement
## baseline.value                                                 -0.024
## accelerations                                                   0.088
## fetal_movement                                                  1.000
## uterine_contractions                                           -0.317
## light_decelerations                                            -0.018
## abnormal_short_term_variability                                 0.125
## mean_value_of_short_term_variability                            0.094
## percentage_of_time_with_abnormal_long_term_variability         -0.032
## mean_value_of_long_term_variability                            -0.029
## histogram_width                                                 0.182
## histogram_max                                                   0.127
## histogram_number_of_peaks                                       0.200
## histogram_mode                                                  0.014
## histogram_variance                                              0.098
## histogram_tendency                                              0.010
##                                                        uterine_contractions
## baseline.value                                                       -0.148
## accelerations                                                         0.088
## fetal_movement                                                       -0.317
## uterine_contractions                                                  1.000
## light_decelerations                                                   0.291
## abnormal_short_term_variability                                      -0.234
## mean_value_of_short_term_variability                                  0.309
## percentage_of_time_with_abnormal_long_term_variability               -0.296
## mean_value_of_long_term_variability                                  -0.067
## histogram_width                                                       0.140
## histogram_max                                                         0.112
## histogram_number_of_peaks                                             0.084
## histogram_mode                                                       -0.105
## histogram_variance                                                    0.252
## histogram_tendency                                                   -0.069
##                                                        light_decelerations
## baseline.value                                                      -0.162
## accelerations                                                       -0.098
## fetal_movement                                                      -0.018
## uterine_contractions                                                 0.291
## light_decelerations                                                  1.000
## abnormal_short_term_variability                                     -0.135
## mean_value_of_short_term_variability                                 0.586
## percentage_of_time_with_abnormal_long_term_variability              -0.314
## mean_value_of_long_term_variability                                 -0.246
## histogram_width                                                      0.546
## histogram_max                                                        0.220
## histogram_number_of_peaks                                            0.426
## histogram_mode                                                      -0.305
## histogram_variance                                                   0.676
## histogram_tendency                                                   0.032
##                                                        abnormal_short_term_variability
## baseline.value                                                                   0.303
## accelerations                                                                   -0.280
## fetal_movement                                                                   0.125
## uterine_contractions                                                            -0.234
## light_decelerations                                                             -0.135
## abnormal_short_term_variability                                                  1.000
## mean_value_of_short_term_variability                                            -0.452
## percentage_of_time_with_abnormal_long_term_variability                           0.456
## mean_value_of_long_term_variability                                             -0.333
## histogram_width                                                                 -0.261
## histogram_max                                                                   -0.125
## histogram_number_of_peaks                                                       -0.170
## histogram_mode                                                                   0.084
## histogram_variance                                                              -0.211
## histogram_tendency                                                              -0.008
##                                                        mean_value_of_short_term_variability
## baseline.value                                                                       -0.312
## accelerations                                                                         0.232
## fetal_movement                                                                        0.094
## uterine_contractions                                                                  0.309
## light_decelerations                                                                   0.586
## abnormal_short_term_variability                                                      -0.452
## mean_value_of_short_term_variability                                                  1.000
## percentage_of_time_with_abnormal_long_term_variability                               -0.565
## mean_value_of_long_term_variability                                                   0.016
## histogram_width                                                                       0.685
## histogram_max                                                                         0.401
## histogram_number_of_peaks                                                             0.522
## histogram_mode                                                                       -0.312
## histogram_variance                                                                    0.655
## histogram_tendency                                                                   -0.062
##                                                        percentage_of_time_with_abnormal_long_term_variability
## baseline.value                                                                                          0.332
## accelerations                                                                                          -0.425
## fetal_movement                                                                                         -0.032
## uterine_contractions                                                                                   -0.296
## light_decelerations                                                                                    -0.314
## abnormal_short_term_variability                                                                         0.456
## mean_value_of_short_term_variability                                                                   -0.565
## percentage_of_time_with_abnormal_long_term_variability                                                  1.000
## mean_value_of_long_term_variability                                                                    -0.130
## histogram_width                                                                                        -0.462
## histogram_max                                                                                          -0.272
## histogram_number_of_peaks                                                                              -0.289
## histogram_mode                                                                                          0.218
## histogram_variance                                                                                     -0.400
## histogram_tendency                                                                                      0.049
##                                                        mean_value_of_long_term_variability
## baseline.value                                                                      -0.046
## accelerations                                                                       -0.157
## fetal_movement                                                                      -0.029
## uterine_contractions                                                                -0.067
## light_decelerations                                                                 -0.246
## abnormal_short_term_variability                                                     -0.333
## mean_value_of_short_term_variability                                                 0.016
## percentage_of_time_with_abnormal_long_term_variability                              -0.130
## mean_value_of_long_term_variability                                                  1.000
## histogram_width                                                                      0.080
## histogram_max                                                                       -0.013
## histogram_number_of_peaks                                                            0.044
## histogram_mode                                                                       0.041
## histogram_variance                                                                  -0.159
## histogram_tendency                                                                   0.148
##                                                        histogram_width
## baseline.value                                                  -0.146
## accelerations                                                    0.299
## fetal_movement                                                   0.182
## uterine_contractions                                             0.140
## light_decelerations                                              0.546
## abnormal_short_term_variability                                 -0.261
## mean_value_of_short_term_variability                             0.685
## percentage_of_time_with_abnormal_long_term_variability          -0.462
## mean_value_of_long_term_variability                              0.080
## histogram_width                                                  1.000
## histogram_max                                                    0.689
## histogram_number_of_peaks                                        0.752
## histogram_mode                                                  -0.134
## histogram_variance                                               0.721
## histogram_tendency                                               0.117
##                                                        histogram_max
## baseline.value                                                 0.294
## accelerations                                                  0.411
## fetal_movement                                                 0.127
## uterine_contractions                                           0.112
## light_decelerations                                            0.220
## abnormal_short_term_variability                               -0.125
## mean_value_of_short_term_variability                           0.401
## percentage_of_time_with_abnormal_long_term_variability        -0.272
## mean_value_of_long_term_variability                           -0.013
## histogram_width                                                0.689
## histogram_max                                                  1.000
## histogram_number_of_peaks                                      0.524
## histogram_mode                                                 0.312
## histogram_variance                                             0.484
## histogram_tendency                                            -0.130
##                                                        histogram_number_of_peaks
## baseline.value                                                            -0.111
## accelerations                                                              0.194
## fetal_movement                                                             0.200
## uterine_contractions                                                       0.084
## light_decelerations                                                        0.426
## abnormal_short_term_variability                                           -0.170
## mean_value_of_short_term_variability                                       0.522
## percentage_of_time_with_abnormal_long_term_variability                    -0.289
## mean_value_of_long_term_variability                                        0.044
## histogram_width                                                            0.752
## histogram_max                                                              0.524
## histogram_number_of_peaks                                                  1.000
## histogram_mode                                                            -0.083
## histogram_variance                                                         0.548
## histogram_tendency                                                         0.112
##                                                        histogram_mode
## baseline.value                                                  0.765
## accelerations                                                   0.247
## fetal_movement                                                  0.014
## uterine_contractions                                           -0.105
## light_decelerations                                            -0.305
## abnormal_short_term_variability                                 0.084
## mean_value_of_short_term_variability                           -0.312
## percentage_of_time_with_abnormal_long_term_variability          0.218
## mean_value_of_long_term_variability                             0.041
## histogram_width                                                -0.134
## histogram_max                                                   0.312
## histogram_number_of_peaks                                      -0.083
## histogram_mode                                                  1.000
## histogram_variance                                             -0.191
## histogram_tendency                                              0.405
##                                                        histogram_variance
## baseline.value                                                     -0.175
## accelerations                                                       0.204
## fetal_movement                                                      0.098
## uterine_contractions                                                0.252
## light_decelerations                                                 0.676
## abnormal_short_term_variability                                    -0.211
## mean_value_of_short_term_variability                                0.655
## percentage_of_time_with_abnormal_long_term_variability             -0.400
## mean_value_of_long_term_variability                                -0.159
## histogram_width                                                     0.721
## histogram_max                                                       0.484
## histogram_number_of_peaks                                           0.548
## histogram_mode                                                     -0.191
## histogram_variance                                                  1.000
## histogram_tendency                                                 -0.006
##                                                        histogram_tendency
## baseline.value                                                      0.295
## accelerations                                                       0.030
## fetal_movement                                                      0.010
## uterine_contractions                                               -0.069
## light_decelerations                                                 0.032
## abnormal_short_term_variability                                    -0.008
## mean_value_of_short_term_variability                               -0.062
## percentage_of_time_with_abnormal_long_term_variability              0.049
## mean_value_of_long_term_variability                                 0.148
## histogram_width                                                     0.117
## histogram_max                                                      -0.130
## histogram_number_of_peaks                                           0.112
## histogram_mode                                                      0.405
## histogram_variance                                                 -0.006
## histogram_tendency                                                  1.000

# Visualisasi korelasi
corrplot(cor_matrix, method = "color", type = "upper", 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.2,
         tl.cex = 0.4,
         cl.cex = 0.7,
         mar = c(0, 0, 2, 0), 
         title = "Correlation Matrix After Removing Highly Correlated Columns", 
         )

SMOTE untuk Menyeimbangkan Data

# Pastikan target sebagai faktor
data_winsorized$fetal_health <- as.factor(data_winsorized$fetal_health)

# Distribusi target sebelum SMOTE
cat("Distribution Without SMOTE:\n")

## Distribution Without SMOTE:

print(table(data_winsorized$fetal_health))

## 
##    1    2    3 
## 1645  291  173

# Terapkan SMOTE dengan package UBL
data_smote <- SmoteClassif(fetal_health ~ ., data_winsorized, C.perc = "balance")

# Pisah fitur dan target
X_smote <- data_smote[, -which(names(data_smote) == "fetal_health")]
y_smote <- as.factor(data_smote$fetal_health)

# Hapus fitur sangat berkorelasi di data SMOTE
X_smote <- remove_correlated(X_smote, threshold = 0.8)

## Removing highly correlated variables: histogram_number_of_peaks

# Cek kolom konstan di data SMOTE
variances_smote <- apply(X_smote, 2, var)
zero_variance_smote_cols <- names(variances_smote[variances_smote == 0 | is.na(variances_smote)])

if (length(zero_variance_smote_cols) > 0) {
  cat("Removing constant or zero-variance columns in SMOTE data:", zero_variance_smote_cols, "\n")
  X_smote <- X_smote[, !names(X_smote) %in% zero_variance_smote_cols]
} else {
  cat("No constant or zero-variance columns detected in SMOTE data.\n")
}

## No constant or zero-variance columns detected in SMOTE data.

# Distribution of target variable after SMOTE
cat("\nDistribution With SMOTE:\n")

## 
## Distribution With SMOTE:

print(table(y_smote))

## y_smote
##   1   2   3 
## 703 703 702

Visualization: Class distribution before and after SMOTE

dist_before <- as.data.frame(table(data_winsorized$fetal_health))
dist_after <- as.data.frame(table(y_smote))
dist_before$Dataset <- "Before SMOTE"
dist_after$Dataset <- "After SMOTE"
colnames(dist_before) <- c("Class", "Count", "Dataset")
colnames(dist_after) <- c("Class", "Count", "Dataset")
dist_data <- rbind(dist_before, dist_after)

p_dist <- ggplot(dist_data, aes(x = Class, y = Count, fill = Dataset)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Class Distribution Before and After SMOTE", x = "Fetal Health Class", y = "Count") +
  theme_minimal()
print(p_dist)

## PCA

# --- Dimensionality Reduction (PCA and FA) ---
# Standardize the features, handling potential NA/NaN issues
X_scaled <- scale(numerical_features, center = TRUE, scale = TRUE)
X_scaled[is.na(X_scaled)] <- 0 # Replace NA/NaN with 0 if any remain
X_smote_scaled <- scale(X_smote, center = TRUE, scale = TRUE)
X_smote_scaled[is.na(X_smote_scaled)] <- 0 # Replace NA/NaN with 0 if any remain

# E1: PCA without SMOTE
cat("\nE1 - PCA without SMOTE (Kaiser Rule)\n")

## 
## E1 - PCA without SMOTE (Kaiser Rule)

# Jalankan PCA
pca <- prcomp(X_scaled, scale. = FALSE)

# Hitung eigenvalues dari komponen PCA
eigenvalues <- pca$sdev^2

# Gunakan Kaiser Rule: pilih komponen dengan eigenvalue > 1
num_components <- sum(eigenvalues > 1)
cat("Jumlah komponen dengan eigenvalue > 1 (Kaiser Rule):", num_components, "\n")

## Jumlah komponen dengan eigenvalue > 1 (Kaiser Rule): 5

# Transformasi data menggunakan komponen tersebut
X_pca <- predict(pca)[, 1:num_components]

# Tampilkan proporsi variansi yang dijelaskan
cat("\nE1 - PCA Explained Variance Ratio (Without SMOTE):\n")

## 
## E1 - PCA Explained Variance Ratio (Without SMOTE):

print(summary(pca)$importance[2, 1:num_components])

##     PC1     PC2     PC3     PC4     PC5 
## 0.30936 0.15458 0.10820 0.09122 0.08444

# Jalankan PCA pada data hasil SMOTE yang telah diskalakan
pca_smote <- prcomp(X_smote_scaled, scale. = FALSE)

# Hitung eigenvalue
eigenvalues_smote <- pca_smote$sdev^2

# Tentukan jumlah komponen dengan eigenvalue > 1 (Kaiser Rule)
num_components_smote <- sum(eigenvalues_smote > 1)
cat("Jumlah komponen dengan eigenvalue > 1 (Kaiser Rule):", num_components_smote, "\n")

## Jumlah komponen dengan eigenvalue > 1 (Kaiser Rule): 5

# Transformasi data ke ruang PCA
X_smote_pca <- predict(pca_smote)[, 1:num_components_smote]

# Tampilkan variansi yang dijelaskan oleh komponen-komponen terpilih
cat("\nF1 - PCA Explained Variance Ratio (With SMOTE):\n")

## 
## F1 - PCA Explained Variance Ratio (With SMOTE):

print(summary(pca_smote)$importance[2, 1:num_components_smote])

##     PC1     PC2     PC3     PC4     PC5 
## 0.38888 0.14965 0.10227 0.08428 0.07405

# E2: Factor Analysis without SMOTE
cat("\nE2 - Factor Analysis without SMOTE\n")

## 
## E2 - Factor Analysis without SMOTE

fa_model <- fa(numerical_features, nfactors = 2, rotate = "varimax")
X_fa <- fa_model$scores
cat("\nE2 - Factor Loadings (Without SMOTE):\n")

## 
## E2 - Factor Loadings (Without SMOTE):

print(fa_model$loadings)

## 
## Loadings:
##                                                        MR1    MR2   
## baseline.value                                         -0.201  0.787
## accelerations                                           0.351  0.182
## fetal_movement                                          0.118       
## uterine_contractions                                    0.276 -0.170
## light_decelerations                                     0.594 -0.203
## abnormal_short_term_variability                        -0.369  0.175
## mean_value_of_short_term_variability                    0.819 -0.242
## percentage_of_time_with_abnormal_long_term_variability -0.580  0.209
## mean_value_of_long_term_variability                                 
## histogram_width                                         0.921       
## histogram_max                                           0.654  0.461
## histogram_number_of_peaks                               0.699       
## histogram_mode                                         -0.144  0.914
## histogram_variance                                      0.794       
## histogram_tendency                                             0.311
## 
##                  MR1   MR2
## SS loadings    4.165 2.031
## Proportion Var 0.278 0.135
## Cumulative Var 0.278 0.413

# Visualization: Factor Loadings Plot
fa_loadings <- as.data.frame(fa_model$loadings[, 1:2])
fa_loadings$Variable <- rownames(fa_loadings)
fa_loadings_long <- pivot_longer(fa_loadings, cols = c("MR1", "MR2"), names_to = "Factor", values_to = "Loading")
p_fa <- ggplot(fa_loadings_long, aes(x = Variable, y = Loading, fill = Factor)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "E2 - Factor Loadings (Without SMOTE)", x = "Variable", y = "Loading") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_fa)

# Hitung eigenvalue dari matriks korelasi
eigen_vals <- eigen(cor(X_smote))$values
num_factors <- sum(eigen_vals > 1)
cat("Jumlah faktor dengan eigenvalue > 1 (Kaiser Rule):", num_factors, "\n")

## Jumlah faktor dengan eigenvalue > 1 (Kaiser Rule): 5

# Jalankan FA dengan jumlah faktor optimal
fa_model_smote <- fa(X_smote, nfactors = num_factors, rotate = "varimax")
X_smote_fa <- fa_model_smote$scores

cat("\nF2 - Factor Loadings (With SMOTE):\n")

## 
## F2 - Factor Loadings (With SMOTE):

print(fa_model_smote$loadings)

## 
## Loadings:
##                                                        MR1    MR2    MR3   
## baseline.value                                         -0.187  0.810 -0.177
## accelerations                                                         0.545
## fetal_movement                                                             
## uterine_contractions                                    0.398         0.247
## light_decelerations                                     0.746 -0.253 -0.126
## abnormal_short_term_variability                        -0.234  0.166 -0.674
## mean_value_of_short_term_variability                    0.779 -0.266  0.270
## percentage_of_time_with_abnormal_long_term_variability -0.531  0.335 -0.515
## mean_value_of_long_term_variability                    -0.122         0.317
## histogram_width                                         0.923         0.246
## histogram_max                                           0.641  0.506  0.465
## histogram_mode                                         -0.432  0.788  0.115
## histogram_variance                                      0.799 -0.196       
## histogram_tendency                                     -0.152  0.249 -0.128
##                                                        MR5    MR4   
## baseline.value                                          0.150       
## accelerations                                                       
## fetal_movement                                                -0.410
## uterine_contractions                                           0.696
## light_decelerations                                    -0.164  0.276
## abnormal_short_term_variability                        -0.323 -0.194
## mean_value_of_short_term_variability                   -0.108  0.241
## percentage_of_time_with_abnormal_long_term_variability        -0.246
## mean_value_of_long_term_variability                     0.538 -0.165
## histogram_width                                                     
## histogram_max                                          -0.311 -0.121
## histogram_mode                                          0.376       
## histogram_variance                                     -0.202  0.131
## histogram_tendency                                      0.652       
## 
##                  MR1   MR2   MR3   MR5   MR4
## SS loadings    3.822 1.918 1.612 1.170 0.960
## Proportion Var 0.273 0.137 0.115 0.084 0.069
## Cumulative Var 0.273 0.410 0.525 0.609 0.677

# Visualization: Factor Loadings Plot
fa_loadings_smote <- as.data.frame(fa_model_smote$loadings[, 1:2])
fa_loadings_smote$Variable <- rownames(fa_loadings_smote)
fa_loadings_smote_long <- pivot_longer(fa_loadings_smote, cols = c("MR1", "MR2"), names_to = "Factor", values_to = "Loading")
p_fa_smote <- ggplot(fa_loadings_smote_long, aes(x = Variable, y = Loading, fill = Factor)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "F2 - Factor Loadings (With SMOTE)", x = "Variable", y = "Loading") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_fa_smote)

library(ggplot2)
library(dplyr)
library(psych)

fa_scores <- as.data.frame(fa_model_smote$scores)
colnames(fa_scores) <- c("F1", "F2")
fa_scores$label <- y_smote

fa_loadings <- as.data.frame(fa_model_smote$loadings[, 1:2])
fa_loadings$Variable <- rownames(fa_loadings)
colnames(fa_loadings) <- c("F1_loading", "F2_loading", "Variable")

scale_factor <- 3
fa_loadings <- fa_loadings %>%
  mutate(F1s = F1_loading * scale_factor,
         F2s = F2_loading * scale_factor)

# Plot
p_fa_biplot <- ggplot(fa_scores, aes(x = F1, y = F2, color = label)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_segment(data = fa_loadings, aes(x = 0, y = 0, xend = F1s, yend = F2s),
               arrow = arrow(length = unit(0.3, "cm")), color = "black") +
  geom_text(data = fa_loadings, aes(x = F1s, y = F2s, label = Variable),
            color = "black", vjust = -0.5) +
  # Tambah correlation circle
  annotate("path",
           x = scale_factor * cos(seq(0, 2 * pi, length.out = 100)),
           y = scale_factor * sin(seq(0, 2 * pi, length.out = 100)),
           color = "blue", linetype = "dashed", alpha = 0.5) +
  labs(title = "G1 - Factor Analysis Biplot (With SMOTE)",
       x = "Factor 1", y = "Factor 2") +
  theme_minimal() +
  theme(legend.title = element_blank())

print(p_fa_biplot)

Pemisahan Dataset

# --- LDA and MLR ---
# Define datasets
# D1: PCA + FA without SMOTE
fa_pca_model <- fa(X_pca, nfactors = 2, rotate = "varimax")

## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0

X_pca_fa <- fa_pca_model$scores

# H1: PCA + FA with SMOTE
fa_smote_pca_model <- fa(X_smote_pca, nfactors = 2, rotate = "varimax")
X_smote_pca_fa <- fa_smote_pca_model$scores


# Prepare datasets
datasets <- list(
  A1 = X_scaled,       # Scaled original data
  B1 = X_pca,          # PCA without SMOTE
  C1 = X_fa,           # FA without SMOTE
  D1 = X_pca_fa,       # PCA + FA without SMOTE
  E1 = X_smote_scaled, # Scaled SMOTE data
  F1 = X_smote_pca,    # PCA with SMOTE
  G1 = X_smote_fa,     # FA with SMOTE
  H1 = X_smote_pca_fa  # PCA + FA with SMOTE
)

targets <- list(
  A1 = data_winsorized$fetal_health,
  B1 = data_winsorized$fetal_health,
  C1 = data_winsorized$fetal_health,
  D1 = data_winsorized$fetal_health,
  E1 = y_smote,
  F1 = y_smote,
  G1 = y_smote,
  H1 = y_smote
)

labels_lda <- list(
  A1 = "AA1",
  B1 = "BB1",
  C1 = "CC1",
  D1 = "DD1",
  E1 = "EE1",
  F1 = "FF1",
  G1 = "GG1",
  H1 = "HH1"
)

labels_mlr <- list(
  A1 = "AB1",
  B1 = "BC1",
  C1 = "CD1",
  D1 = "DE1",
  E1 = "EF1",
  F1 = "FG1",
  G1 = "GH1",
  H1 = "HI1"
)

# Initialize lists to store accuracies for comparison
lda_accuracies <- list()
mlr_accuracies <- list()

Uji asumsi, pemodelan, uji signifikansi

# Mardia's Test for Multivariate Normality
# Fungsi untuk menghitung Mardia's Skewness dan Kurtosis dengan pengecekan error
# Fungsi untuk menghitung Mardia's Skewness dan Kurtosis tanpa tryCatch
mardia_test_manual <- function(X) {
  if (!is.matrix(X)) {
    stop("Input harus berupa matriks numerik.")
  }
  if (ncol(X) < 2) {
    stop("Matriks harus memiliki minimal 2 kolom (variabel).")
  }
  if (nrow(X) <= ncol(X)) {
    stop("Jumlah observasi harus lebih banyak dari jumlah variabel.")
  }
  
  n <- nrow(X)
  p <- ncol(X)
  
  # Hitung mean dan covariance matrix
  mean_vec <- colMeans(X)
  S <- cov(X)
  S_inv <- solve(S)
  
  # Hitung deviasi setiap observasi dari mean
  dev <- sweep(X, 2, mean_vec)
  
  # Hitung Mahalanobis distances untuk setiap observasi
  D <- apply(dev, 1, function(x) t(x) %*% S_inv %*% x)
  
  # Mardia's multivariate skewness
  skewness <- (1 / (n^2)) * sum(sapply(1:n, function(i) {
    sapply(1:n, function(j) {
      t(dev[i,]) %*% S_inv %*% dev[j,] %*% t(dev[j,]) %*% S_inv %*% dev[i,]
    }) %>% sum()
  }))
  
  # Alternatif perhitungan lebih efisien:
  # skewness <- (1 / n^2) * sum((t(dev) %*% S_inv %*% dev)^3)
  # Namun, untuk kejelasan, kita gunakan loop
  
  # Mardia's multivariate kurtosis
  kurtosis <- mean(D^2)
  
  # Statistik uji dan p-value untuk skewness (approximate)
  k <- p * (p + 1) * (p + 2) / 6
  skew_stat <- n * skewness / 6
  p_value_skew <- 1 - pchisq(skew_stat, df = k)
  
  # Statistik uji dan p-value untuk kurtosis (approximate)
  expected_kurtosis <- p * (p + 2)
  z_kurtosis <- (kurtosis - expected_kurtosis) / sqrt(8 * p * (p + 2) / n)
  p_value_kurt <- 2 * (1 - pnorm(abs(z_kurtosis)))
  
  # Return hasil
  list(
    skewness = skewness,
    skewness_stat = skew_stat,
    p_value_skewness = p_value_skew,
    kurtosis = kurtosis,
    kurtosis_z = z_kurtosis,
    p_value_kurtosis = p_value_kurt
  )
}

# Inisialisasi list untuk menyimpan akurasi LDA
lda_accuracies <- list()

# Loop analisis untuk setiap dataset
for (key in names(datasets)) {
  X_data <- datasets[[key]]
  y_data <- targets[[key]]
  label <- labels_lda[[key]]
  
  X_data <- as.matrix(X_data)
  
  # Cek jumlah observasi dan kelas
  if (nrow(X_data) < length(unique(y_data)) || any(table(y_data) < 2)) {
    cat(sprintf("\n%s - LDA Skipped: Insufficient observations per class\n", label))
    next
  }
  
  # Mardia's Test
  cat(sprintf("\n%s - Mardia's Test (Skewness, p-value, Kurtosis):\n", label))
  mardia_res <- tryCatch(mardia_test_manual(X_data), error = function(e) {
    cat("Mardia's test gagal: ", e$message, "\n")
    return(list(skewness = NA, p_value_skewness = NA, kurtosis = NA))
  })
  
  if (any(is.na(unlist(mardia_res)))) {
    cat("Mardia's test gagal dijalankan (data mungkin tidak memenuhi syarat).\n")
  } else {
    cat(sprintf("Skewness: %.4f\n", mardia_res$skewness))
    cat(sprintf("P-value Skewness: %.4f\n", mardia_res$p_value_skewness))
    cat(sprintf("Kurtosis: %.4f\n", mardia_res$kurtosis))
    if (mardia_res$p_value_skewness < 0.05) {
      cat("Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).\n")
    } else {
      cat("Kesimpulan: Data mengikuti distribusi normal multivariat (tidak signifikan).\n")
    }
  }
  
  # Box's M Test untuk kesamaan kovarians antar grup
  boxm_result <- tryCatch(biotools::boxM(X_data, y_data), error = function(e) {
    cat(sprintf("\n%s - Box's M Test Failed: %s\n", label, e$message))
    return(NULL)
  })
  if (!is.null(boxm_result)) {
    cat(sprintf("\n%s - Box's M Test:\n", label))
    print(boxm_result)
  }
  
  # Model LDA
  lda_model <- tryCatch(MASS::lda(X_data, grouping = y_data, tol = 1e-6), error = function(e) {
    cat(sprintf("\n%s - LDA Failed: %s\n", label, e$message))
    return(NULL)
  })
  
  if (!is.null(lda_model)) {
    # Wilks' Lambda (aproksimasi)
    svd_vals <- lda_model$svd^2
    wilks_lambda <- prod(1 / (1 + svd_vals))
    cat(sprintf("\n%s - Wilks' Lambda (Approximated):\n", label))
    print(wilks_lambda)
    
    # Koefisien diskriminan
    cat(sprintf("\n%s - LDA Discriminant Coefficients:\n", label))
    print(lda_model$scaling)
    
    # Uji signifikansi variabel dengan ANOVA
    cat(sprintf("\n%s - Significance Test for Variables (ANOVA):\n", label))
    anova_data <- as.data.frame(X_data)
    anova_data$Group <- y_data
    anova_results <- lapply(colnames(X_data), function(var) {
      formula <- as.formula(paste(var, "~ Group"))
      anova_model <- tryCatch(aov(formula, data = anova_data), error = function(e) NULL)
      if (!is.null(anova_model)) {
        summary_anova <- summary(anova_model)
        p_value <- summary_anova[[1]]$`Pr(>F)`[1]
        f_value <- summary_anova[[1]]$`F value`[1]
        return(data.frame(Variable = var, F_Value = f_value, P_Value = p_value))
      } else {
        return(data.frame(Variable = var, F_Value = NA, P_Value = NA))
      }
    })
    anova_results_df <- do.call(rbind, anova_results)
    print(anova_results_df)
    
    # Visualisasi koefisien diskriminan yang distandarisasi
    std_coefs <- lda_model$scaling
    var_sds <- apply(X_data, 2, sd)
    std_coefs <- sweep(std_coefs, 1, var_sds, "*")
    coefs_df <- as.data.frame(std_coefs)
    coefs_df$Variable <- rownames(coefs_df)
    coefs_df <- tidyr::pivot_longer(coefs_df, cols = starts_with("LD"), names_to = "Discriminant", values_to = "Coefficient")
    coefs_df <- merge(coefs_df, anova_results_df[, c("Variable", "P_Value")], by = "Variable", all.x = TRUE)
    coefs_df$Significance <- ifelse(coefs_df$P_Value < 0.05, "*", "")
    
    p_lda <- ggplot(coefs_df, aes(x = Variable, y = Coefficient, fill = Discriminant)) +
      geom_bar(stat = "identity", position = "dodge") +
      geom_text(aes(label = Significance), vjust = -0.5, position = position_dodge(width = 0.45)) +
      labs(title = paste("Standardized LDA Coefficients (", label, ")"), x = "Variable", y = "Standardized Coefficient") +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    print(p_lda)
    
    # Evaluasi LDA dengan confusion matrix dan akurasi
    y_pred <- predict(lda_model, X_data)$class
    cm <- table(y_data, y_pred)
    cat(sprintf("\n%s - Confusion Matrix:\n", label))
    print(cm)
    
    cm_data <- as.data.frame.matrix(cm)
    cm_data$Actual <- rownames(cm_data)
    cm_data <- tidyr::pivot_longer(cm_data, cols = colnames(cm), names_to = "Predicted", values_to = "Count")
    p_cm <- ggplot(cm_data, aes(x = Predicted, y = Actual, fill = Count)) +
      geom_tile() +
      geom_text(aes(label = Count), color = "white") +
      scale_fill_gradient(low = "#f7f7f7", high = "#4e79a7") +
      labs(title = paste("Confusion Matrix for", label, "(LDA)"), x = "Predicted", y = "Actual") +
      theme_minimal()
    print(p_cm)
    
    acc <- mean(y_data == y_pred)
    cat(sprintf("\n%s - Accuracy:\n", label))
    print(acc)
    
    lda_accuracies[[label]] <- acc
  }
}

## 
## AA1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 14.9858
## P-value Skewness: 0.0000
## Kurtosis: 297.0291
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## AA1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 3493.5, df = 240, p-value < 2.2e-16
## 
## 
## AA1 - Wilks' Lambda (Approximated):
## [1] 2.364257e-06
## 
## AA1 - LDA Discriminant Coefficients:
##                                                                 LD1        LD2
## baseline.value                                          0.701426821  0.4162142
## accelerations                                          -0.416189367  0.2419189
## fetal_movement                                         -0.006382276 -0.1917747
## uterine_contractions                                   -0.257259460  0.0582607
## light_decelerations                                    -0.507157913 -0.2617604
## abnormal_short_term_variability                         0.502898143  0.1288263
## mean_value_of_short_term_variability                    0.094254326  0.2534899
## percentage_of_time_with_abnormal_long_term_variability  0.401801596 -0.5154852
## mean_value_of_long_term_variability                    -0.226844625 -0.2535886
## histogram_width                                        -0.293525982  0.2534092
## histogram_max                                           0.273431666 -0.4216371
## histogram_number_of_peaks                               0.052213461 -0.1150758
## histogram_mode                                         -1.013979563 -0.8182263
## histogram_variance                                      0.771339899  0.3846071
## histogram_tendency                                      0.069021985 -0.1346064
## 
## AA1 - Significance Test for Variables (ANOVA):
##                                                  Variable    F_Value
## 1                                          baseline.value 137.603512
## 2                                           accelerations 195.971542
## 3                                          fetal_movement  10.066204
## 4                                    uterine_contractions  92.535685
## 5                                     light_decelerations  62.601111
## 6                         abnormal_short_term_variability 334.558940
## 7                    mean_value_of_short_term_variability 153.668687
## 8  percentage_of_time_with_abnormal_long_term_variability 389.185482
## 9                     mean_value_of_long_term_variability  79.350509
## 10                                        histogram_width  53.943891
## 11                                          histogram_max   2.256021
## 12                              histogram_number_of_peaks  12.087430
## 13                                         histogram_mode 214.056236
## 14                                     histogram_variance 100.580770
## 15                                     histogram_tendency  44.820861
##          P_Value
## 1   6.827411e-57
## 2   8.856190e-79
## 3   4.457249e-05
## 4   3.026880e-39
## 5   3.891911e-27
## 6  6.703609e-127
## 7   5.068339e-63
## 8  1.471757e-144
## 9   5.959481e-34
## 10  1.421706e-23
## 11  1.050197e-01
## 12  6.031104e-06
## 13  2.360489e-85
## 14  1.907755e-42
## 15  8.658192e-20

## 
## AA1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1529   95   21
##      2   88  195    8
##      3   18   56   99

## 
## AA1 - Accuracy:
## [1] 0.8643907
## 
## BB1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 4.9953
## P-value Skewness: 0.0000
## Kurtosis: 32.5453
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## BB1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 706.61, df = 30, p-value < 2.2e-16
## 
## 
## BB1 - Wilks' Lambda (Approximated):
## [1] 6.242841e-06
## 
## BB1 - LDA Discriminant Coefficients:
##            LD1        LD2
## PC1 0.27046361 -0.3008567
## PC2 0.10859520 -0.4557624
## PC3 0.84311202  0.4072280
## PC4 0.08814101 -0.1067632
## PC5 0.18607476 -0.2206565
## 
## BB1 - Significance Test for Variables (ANOVA):
##   Variable    F_Value       P_Value
## 1      PC1 179.958069  7.055170e-73
## 2      PC2  95.742432  1.594449e-40
## 3      PC3 475.030478 5.321832e-171
## 4      PC4   5.250309  5.314781e-03
## 5      PC5  21.533755  5.525210e-10

## 
## BB1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1552   41   52
##      2  114  173    4
##      3   40   54   79

## 
## BB1 - Accuracy:
## [1] 0.8553817
## 
## CC1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 1.9981
## P-value Skewness: 0.0000
## Kurtosis: 7.1346
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## CC1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 226.36, df = 6, p-value < 2.2e-16
## 
## 
## CC1 - Wilks' Lambda (Approximated):
## [1] 0.0001647984
## 
## CC1 - LDA Discriminant Coefficients:
##            LD1       LD2
## MR1  0.7786038 0.7611211
## MR2 -0.8629648 0.6765822
## 
## CC1 - Significance Test for Variables (ANOVA):
##   Variable  F_Value      P_Value
## 1      MR1 117.0307 6.388077e-49
## 2      MR2 144.2239 1.987955e-59

## 
## CC1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1626   19    0
##      2  252   39    0
##      3  157   16    0

## 
## CC1 - Accuracy:
## [1] 0.7894737
## 
## DD1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 1.9981
## P-value Skewness: 0.0000
## Kurtosis: 7.7626
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## DD1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 93.262, df = 6, p-value < 2.2e-16
## 
## 
## DD1 - LDA Failed: variables 1 2 appear to be constant within groups
## 
## EE1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 13.9867
## P-value Skewness: 0.0000
## Kurtosis: 258.0314
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## EE1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 8584.6, df = 210, p-value < 2.2e-16
## 
## 
## EE1 - Wilks' Lambda (Approximated):
## [1] 5.766711e-07
## 
## EE1 - LDA Discriminant Coefficients:
##                                                                LD1          LD2
## baseline.value                                          0.43111518  0.001588134
## accelerations                                          -0.66080101  0.527655774
## fetal_movement                                         -0.05063304 -0.145927114
## uterine_contractions                                   -0.25627467 -0.020740375
## light_decelerations                                    -0.46759642  0.021077773
## abnormal_short_term_variability                         0.77571867  0.061172600
## mean_value_of_short_term_variability                    0.45442796  0.405646249
## percentage_of_time_with_abnormal_long_term_variability  0.32422327 -0.404875593
## mean_value_of_long_term_variability                    -0.31085939 -0.163255805
## histogram_width                                        -0.26402935 -0.035580897
## histogram_max                                           0.20068766 -0.480803248
## histogram_mode                                         -0.88362263 -0.481210420
## histogram_variance                                      0.77604189  0.090063147
## histogram_tendency                                      0.01862363 -0.104061863
## 
## EE1 - Significance Test for Variables (ANOVA):
##                                                  Variable    F_Value
## 1                                          baseline.value 304.228800
## 2                                           accelerations 551.961113
## 3                                          fetal_movement  22.183989
## 4                                    uterine_contractions 124.711906
## 5                                     light_decelerations 194.941513
## 6                         abnormal_short_term_variability 561.352273
## 7                    mean_value_of_short_term_variability 274.366100
## 8  percentage_of_time_with_abnormal_long_term_variability 425.063606
## 9                     mean_value_of_long_term_variability 321.388461
## 10                                        histogram_width  93.642562
## 11                                          histogram_max   2.452833
## 12                                         histogram_mode 639.101591
## 13                                     histogram_variance 271.488996
## 14                                     histogram_tendency 158.487670
##          P_Value
## 1  8.706348e-117
## 2  1.904524e-193
## 3   2.922438e-10
## 4   6.520038e-52
## 5   2.125750e-78
## 6  4.093910e-196
## 7  1.297908e-106
## 8  8.737857e-156
## 9  1.566161e-122
## 10  1.096720e-39
## 11  8.629535e-02
## 12 1.273593e-217
## 13 1.274852e-105
## 14  7.659638e-65

## 
## EE1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 586  93  24
##      2  43 636  24
##      3   0 167 535

## 
## EE1 - Accuracy:
## [1] 0.8334915
## 
## FF1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 4.9953
## P-value Skewness: 0.0000
## Kurtosis: 34.4231
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## FF1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 2117.2, df = 30, p-value < 2.2e-16
## 
## 
## FF1 - Wilks' Lambda (Approximated):
## [1] 1.052511e-06
## 
## FF1 - LDA Discriminant Coefficients:
##            LD1         LD2
## PC1 -0.0115421  0.44923823
## PC2  0.8763763  0.28876662
## PC3  0.6843459 -0.39018593
## PC4  0.1266701 -0.01265415
## PC5 -0.3615343  0.31421408
## 
## FF1 - Significance Test for Variables (ANOVA):
##   Variable   F_Value       P_Value
## 1      PC1 364.90299 8.750254e-137
## 2      PC2 795.97192 3.700256e-258
## 3      PC3 285.59847 1.819984e-110
## 4      PC4   4.89484  7.570521e-03
## 5      PC5  63.36136  1.901023e-27

## 
## FF1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 540  88  75
##      2  56 605  42
##      3  18 159 525

## 
## FF1 - Accuracy:
## [1] 0.7922201
## 
## GG1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 4.9953
## P-value Skewness: 0.0000
## Kurtosis: 33.2906
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## GG1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 1604.4, df = 30, p-value < 2.2e-16
## 
## 
## GG1 - Wilks' Lambda (Approximated):
## [1] 1.356584e-06
## 
## GG1 - LDA Discriminant Coefficients:
##            LD1        LD2
## MR1  0.4499719  0.4740041
## MR2 -0.1359885 -0.9574111
## MR3 -1.2109584  0.4616812
## MR5 -0.8507201 -0.2687832
## MR4 -0.3372380  0.5033044
## 
## GG1 - Significance Test for Variables (ANOVA):
##   Variable   F_Value       P_Value
## 1      MR1 120.10987  4.024017e-50
## 2      MR2 283.01971 1.386185e-109
## 3      MR3 511.99030 6.476115e-182
## 4      MR5 225.16611  2.423701e-89
## 5      MR4  92.78798  2.404584e-39

## 
## GG1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 555  96  52
##      2  92 565  46
##      3  37 166 499

## 
## GG1 - Accuracy:
## [1] 0.7680266
## 
## HH1 - Mardia's Test (Skewness, p-value, Kurtosis):
## Skewness: 1.9981
## P-value Skewness: 0.0000
## Kurtosis: 8.7146
## Kesimpulan: Data tidak mengikuti distribusi normal multivariat (signifikan).
## 
## HH1 - Box's M Test:
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X_data
## Chi-Sq (approx.) = 687.39, df = 6, p-value < 2.2e-16
## 
## 
## HH1 - LDA Failed: variables 1 2 appear to be constant within groups

library(car)

## Warning: package 'car' was built under R version 4.4.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

# MLR Model
for (key in names(datasets)) {
  X_data <- as.data.frame(datasets[[key]])
  y_data <- targets[[key]]
  label <- labels_mlr[[key]]
  
  cat(sprintf("\n========== %s ==========\n", label))
  
  # Hitung VIF dengan membuat model linier dummy
  temp_data <- X_data
  temp_data$dummy_y <- rnorm(nrow(temp_data))  # dummy target
  vif_model <- lm(dummy_y ~ ., data = temp_data)
  vif_values <- vif(vif_model)
  cat(">>> VIF (Variance Inflation Factor):\n")
  print(round(vif_values, 3))
  
  # Cek cukupnya data
  if (nrow(X_data) < length(unique(y_data)) || any(table(y_data) < 2)) {
    cat("Skipped: Tidak cukup data untuk semua kelas.\n")
    next
  }
  
  data_mlr <- cbind(X_data, y = y_data)
  
  # MLR Model
  mlr_model <- tryCatch(nnet::multinom(y ~ ., data = data_mlr, trace = FALSE, maxit = 1000), error = function(e) {
    cat(sprintf("\n%s - MLR Failed: %s\n", label, e$message))
    return(NULL)
  })
  
  if (!is.null(mlr_model)) {
    # Likelihood Ratio Test
    null_model <- tryCatch(nnet::multinom(y ~ 1, data = data_mlr, trace = FALSE, maxit = 1000), error = function(e) NULL)
    if (!is.null(null_model)) {
      loglik_full <- logLik(mlr_model)
      loglik_null <- logLik(null_model)
      lrt_stat <- 2 * (loglik_full - loglik_null)
      df <- length(coef(mlr_model)) - length(coef(null_model))
      lrt_pvalue <- pchisq(lrt_stat, df, lower.tail = FALSE)
      
      cat(sprintf("\n%s - Likelihood Ratio Test (Manual Calculation):\n", label))
      cat(sprintf("LRT Statistic: %.3f, df: %d, p-value: %.3e\n", lrt_stat, df, lrt_pvalue))
    } else {
      cat(sprintf("\n%s - Likelihood Ratio Test Skipped: Null model failed\n", label))
    }
    
    # Wald Test
    cat(sprintf("\n%s - Wald Test (Coefficients and p-values):\n", label))
    print(summary(mlr_model))
    
    # Confusion Matrix
    y_pred <- predict(mlr_model, X_data, type = "class")
    cm <- table(y_data, y_pred)
    cat(sprintf("\n%s - Confusion Matrix:\n", label))
    print(cm)
    
    cm_data <- as.data.frame.matrix(cm)
    cm_data$Actual <- rownames(cm_data)
    cm_data <- tidyr::pivot_longer(cm_data, cols = c("1", "2", "3"), names_to = "Predicted", values_to = "Count")
    p_cm_mlr <- ggplot(cm_data, aes(x = Predicted, y = Actual, fill = Count)) +
      geom_tile() +
      geom_text(aes(label = Count), color = "white") +
      scale_fill_gradient(low = "#f7f7f7", high = "#f28e2b") +
      labs(title = paste("Confusion Matrix for", label, "(MLR)"), x = "Predicted", y = "Actual") +
      theme_minimal()
    print(p_cm_mlr)
    
    # Odds Ratios
    odds_ratios <- exp(coef(mlr_model))
    cat(sprintf("\n%s - Odds Ratios:\n", label))
    print(odds_ratios)
    
    acc <- mean(y_data == y_pred)
    cat(sprintf("\n%s - Accuracy:\n", label))
    print(acc)
    
    mlr_accuracies[[label]] <- acc
  }
}

## 
## ========== AB1 ==========
## >>> VIF (Variance Inflation Factor):
##                                         baseline.value 
##                                                  4.063 
##                                          accelerations 
##                                                  2.567 
##                                         fetal_movement 
##                                                  1.269 
##                                   uterine_contractions 
##                                                  1.395 
##                                    light_decelerations 
##                                                  3.045 
##                        abnormal_short_term_variability 
##                                                  1.859 
##                   mean_value_of_short_term_variability 
##                                                  3.117 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                  2.074 
##                    mean_value_of_long_term_variability 
##                                                  1.851 
##                                        histogram_width 
##                                                  9.006 
##                                          histogram_max 
##                                                  6.991 
##                              histogram_number_of_peaks 
##                                                  2.386 
##                                         histogram_mode 
##                                                  5.100 
##                                     histogram_variance 
##                                                  3.002 
##                                     histogram_tendency 
##                                                  2.714 
## 
## AB1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 1755.499, df: 30, p-value: 0.000e+00
## 
## AB1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2   -4.535360     -0.1546641     -3.327839     0.38686473           -0.5213949
## 3   -9.027453      1.3305182     -5.608926     0.04413364           -0.9672625
##   light_decelerations abnormal_short_term_variability
## 2          -0.9960602                       0.8520202
## 3          -1.1044701                       2.4437219
##   mean_value_of_short_term_variability
## 2                          -0.91154625
## 3                          -0.05280065
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                              0.4828328
## 3                                              0.2118075
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                         -0.01137033      -0.7768076      1.286825
## 3                         -0.88491386      -0.7132121      1.036181
##   histogram_number_of_peaks histogram_mode histogram_variance
## 2                 0.2994720      0.3915495           1.454514
## 3                -0.1510739     -2.8150510           2.266049
##   histogram_tendency
## 2          0.2791977
## 3          0.3425407
## 
## Std. Errors:
##   (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2   0.2996099      0.2577133     0.3854525      0.1104507            0.1212088
## 3   0.8989512      0.3122373     0.9893373      0.1649483            0.1864061
##   light_decelerations abnormal_short_term_variability
## 2           0.2717582                       0.1709663
## 3           0.2648024                       0.2864991
##   mean_value_of_short_term_variability
## 2                            0.2650570
## 3                            0.2981736
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                              0.1163989
## 3                                              0.2258749
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                           0.1944055       0.2667072     0.2709873
## 3                           0.3275101       0.4426077     0.3714317
##   histogram_number_of_peaks histogram_mode histogram_variance
## 2                 0.1499308      0.3339761          0.2505191
## 3                 0.2538637      0.3538485          0.2681127
##   histogram_tendency
## 2          0.1507782
## 3          0.2053751
## 
## Residual Deviance: 1079.949 
## AIC: 1143.949 
## 
## AB1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1575   53   17
##      2   83  193   15
##      3   20   35  118

## 
## AB1 - Odds Ratios:
##    (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2 0.0107230482      0.8567029   0.035870535       1.472357            0.5936919
## 3 0.0001200679      3.7830032   0.003665004       1.045122            0.3801222
##   light_decelerations abnormal_short_term_variability
## 2           0.3693317                        2.344378
## 3           0.3313864                       11.515822
##   mean_value_of_short_term_variability
## 2                            0.4019023
## 3                            0.9485691
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                               1.620659
## 3                                               1.235910
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                           0.9886941       0.4598717      3.621272
## 3                           0.4127497       0.4900675      2.818432
##   histogram_number_of_peaks histogram_mode histogram_variance
## 2                 1.3491462     1.47927114           4.282403
## 3                 0.8597841     0.05990166           9.641231
##   histogram_tendency
## 2           1.322069
## 3           1.408522
## 
## AB1 - Accuracy:
## [1] 0.8942627
## 
## ========== BC1 ==========
## >>> VIF (Variance Inflation Factor):
## PC1 PC2 PC3 PC4 PC5 
##   1   1   1   1   1 
## 
## BC1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 1305.404, df: 10, p-value: 2.609e-274
## 
## BC1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept)       PC1       PC2      PC3        PC4       PC5
## 2   -3.008824 0.6864185  0.675013 1.027433 0.24137535 0.5220185
## 3   -4.687199 0.2899734 -0.308525 2.384946 0.05931237 0.3202953
## 
## Std. Errors:
##   (Intercept)        PC1        PC2        PC3        PC4        PC5
## 2   0.1376679 0.04900339 0.06917199 0.09464289 0.07002781 0.08670944
## 3   0.2830351 0.04617997 0.08537680 0.16078922 0.10040872 0.12508875
## 
## Residual Deviance: 1530.045 
## AIC: 1554.045 
## 
## BC1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1570   34   41
##      2  120  161   10
##      3   50   41   82

## 
## BC1 - Odds Ratios:
##   (Intercept)      PC1       PC2       PC3      PC4      PC5
## 2 0.049349661 1.986588 1.9640586  2.793885 1.272999 1.685426
## 3 0.009212457 1.336392 0.7345296 10.858480 1.061107 1.377534
## 
## BC1 - Accuracy:
## [1] 0.8596491
## 
## ========== CD1 ==========
## >>> VIF (Variance Inflation Factor):
##   MR1   MR2 
## 1.001 1.001 
## 
## CD1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 578.244, df: 4, p-value: 7.915e-124
## 
## CD1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept)        MR1        MR2
## 2   -2.730659 -1.4724532  1.2206809
## 3   -2.678121  0.1397596 -0.9330897
## 
## Std. Errors:
##   (Intercept)        MR1        MR2
## 2   0.1219215 0.10288603 0.09833167
## 3   0.1087513 0.08745025 0.09424308
## 
## Residual Deviance: 2257.204 
## AIC: 2269.204 
## 
## CD1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1607   38    0
##      2  206   85    0
##      3  150   23    0

## 
## CD1 - Odds Ratios:
##   (Intercept)       MR1       MR2
## 2  0.06517632 0.2293621 3.3894947
## 3  0.06869213 1.1499973 0.3933366
## 
## CD1 - Accuracy:
## [1] 0.802276
## 
## ========== DE1 ==========
## >>> VIF (Variance Inflation Factor):
##   MR1   MR2 
## 1.134 1.134 
## 
## DE1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 0.000, df: 4, p-value: 1.000e+00
## 
## DE1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept)          MR1          MR2
## 2   -1.732172 4.225877e-06 2.641869e-05
## 3   -2.252204 1.486263e-05 1.722302e-05
## 
## Std. Errors:
##   (Intercept)          MR1          MR2
## 2  0.06359502 0.000000e+00 1.099654e-21
## 3  0.07992653 2.597782e-21 1.770602e-21
## 
## Residual Deviance: 2835.448 
## AIC: 2847.448 
## 
## DE1 - Confusion Matrix:
##       y_pred
## y_data    1    2    3
##      1 1645    0    0
##      2  291    0    0
##      3  173    0    0

## 
## DE1 - Odds Ratios:
##   (Intercept)      MR1      MR2
## 2   0.1768998 1.000004 1.000026
## 3   0.1051672 1.000015 1.000017
## 
## DE1 - Accuracy:
## [1] 0.7799905
## 
## ========== EF1 ==========
## >>> VIF (Variance Inflation Factor):
##                                         baseline.value 
##                                                  4.233 
##                                          accelerations 
##                                                  2.054 
##                                         fetal_movement 
##                                                  1.183 
##                                   uterine_contractions 
##                                                  1.935 
##                                    light_decelerations 
##                                                  3.006 
##                        abnormal_short_term_variability 
##                                                  2.184 
##                   mean_value_of_short_term_variability 
##                                                  4.988 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                  3.189 
##                    mean_value_of_long_term_variability 
##                                                  2.071 
##                                        histogram_width 
##                                                  9.528 
##                                          histogram_max 
##                                                  6.759 
##                                         histogram_mode 
##                                                  7.002 
##                                     histogram_variance 
##                                                  3.530 
##                                     histogram_tendency 
##                                                  2.925 
## 
## EF1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 3088.537, df: 28, p-value: 0.000e+00
## 
## EF1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2  -0.2976572      0.1076334     -2.719055     0.22304836           -0.5192612
## 3  -1.8311926      1.5362856     -5.239806    -0.09119814           -1.2979018
##   light_decelerations abnormal_short_term_variability
## 2           -1.113091                       0.8353494
## 3           -2.138346                       2.6489640
##   mean_value_of_short_term_variability
## 2                           -1.3311877
## 3                            0.9814443
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                              0.5779501
## 3                                              0.1791453
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                          0.02941503      -0.7429038      1.522455
## 3                         -1.34000521      -1.0186417      1.074816
##   histogram_mode histogram_variance histogram_tendency
## 2     -0.1309325           2.296594          0.2873952
## 3     -3.2242140           3.396115          0.1205096
## 
## Std. Errors:
##   (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2   0.1755196      0.2353804     0.2574797      0.1064790            0.1364606
## 3   0.3274820      0.2944149     0.7099174      0.1322297            0.1833212
##   light_decelerations abnormal_short_term_variability
## 2           0.2523423                       0.1741852
## 3           0.2950798                       0.2252022
##   mean_value_of_short_term_variability
## 2                            0.3285242
## 3                            0.3285351
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                              0.1380616
## 3                                              0.2154641
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                           0.1773035       0.2999940     0.2946158
## 3                           0.2304425       0.3863953     0.3769490
##   histogram_mode histogram_variance histogram_tendency
## 2      0.3563585          0.2720778          0.1803078
## 3      0.4273933          0.3078605          0.2156736
## 
## Residual Deviance: 1543.211 
## AIC: 1603.211 
## 
## EF1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 611  74  18
##      2  39 601  63
##      3   9  93 600

## 
## EF1 - Odds Ratios:
##   (Intercept) baseline.value accelerations fetal_movement uterine_contractions
## 2   0.7425559       1.113639   0.065937064      1.2498810            0.5949599
## 3   0.1602224       4.647296   0.005301285      0.9128368            0.2731042
##   light_decelerations abnormal_short_term_variability
## 2           0.3285419                         2.30562
## 3           0.1178496                        14.13938
##   mean_value_of_short_term_variability
## 2                            0.2641633
## 3                            2.6683072
##   percentage_of_time_with_abnormal_long_term_variability
## 2                                               1.782381
## 3                                               1.196195
##   mean_value_of_long_term_variability histogram_width histogram_max
## 2                           1.0298519       0.4757305      4.583463
## 3                           0.2618443       0.3610851      2.929453
##   histogram_mode histogram_variance histogram_tendency
## 2     0.87727702           9.940271           1.332951
## 3     0.03978704          29.847928           1.128072
## 
## EF1 - Accuracy:
## [1] 0.8595825
## 
## ========== FG1 ==========
## >>> VIF (Variance Inflation Factor):
## PC1 PC2 PC3 PC4 PC5 
##   1   1   1   1   1 
## 
## FG1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 2513.253, df: 10, p-value: 0.000e+00
## 
## FG1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept)        PC1       PC2      PC3         PC4        PC5
## 2  -0.0396679 -0.7903702 0.5405675 1.281506 -0.03577445 -0.9398919
## 3  -0.8478772 -0.1596825 2.5236663 1.881434  0.25415190 -1.4049281
## 
## Std. Errors:
##   (Intercept)        PC1        PC2        PC3        PC4        PC5
## 2  0.09978608 0.05074461 0.07356033 0.08803727 0.07019422 0.09143347
## 3  0.14135923 0.04418639 0.12317339 0.11631403 0.08519247 0.13834163
## 
## Residual Deviance: 2118.495 
## AIC: 2142.495 
## 
## FG1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 557  87  59
##      2  67 548  88
##      3  38 101 563

## 
## FG1 - Odds Ratios:
##   (Intercept)       PC1       PC2      PC3       PC4       PC5
## 2   0.9611086 0.4536768  1.716981 3.602060 0.9648579 0.3906700
## 3   0.4283232 0.8524144 12.474247 6.562912 1.2893676 0.2453847
## 
## FG1 - Accuracy:
## [1] 0.7912713
## 
## ========== GH1 ==========
## >>> VIF (Variance Inflation Factor):
##   MR1   MR2   MR3   MR5   MR4 
## 1.008 1.018 1.019 1.007 1.008 
## 
## GH1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 2302.203, df: 10, p-value: 0.000e+00
## 
## GH1 - Wald Test (Coefficients and p-values):
## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##   (Intercept)        MR1        MR2       MR3         MR5        MR4
## 2 -0.08495295 -0.7069997  1.3848634 -1.539444 -0.08338301 -1.2436374
## 3 -0.18678609  0.9869159 -0.2080233 -2.981898 -1.87820724 -0.9567872
## 
## Std. Errors:
##   (Intercept)       MR1        MR2       MR3       MR5       MR4
## 2   0.1080107 0.1082165 0.09592049 0.1008310 0.1103074 0.0952941
## 3   0.1073241 0.1051512 0.10271432 0.1475438 0.1261135 0.1028907
## 
## Residual Deviance: 2329.546 
## AIC: 2353.546 
## 
## GH1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 558  91  54
##      2  71 542  90
##      3  53 119 530

## 
## GH1 - Odds Ratios:
##   (Intercept)       MR1       MR2        MR3       MR5       MR4
## 2   0.9185555 0.4931215 3.9942803 0.21450029 0.9199987 0.2883335
## 3   0.8296212 2.6829473 0.8121881 0.05069652 0.1528639 0.3841250
## 
## GH1 - Accuracy:
## [1] 0.7732448
## 
## ========== HI1 ==========
## >>> VIF (Variance Inflation Factor):
##   MR1   MR2 
## 1.016 1.016 
## 
## HI1 - Likelihood Ratio Test (Manual Calculation):
## LRT Statistic: 0.000, df: 4, p-value: 1.000e+00
## 
## HI1 - Wald Test (Coefficients and p-values):

## Warning in sqrt(diag(vc)): NaNs produced

## Call:
## nnet::multinom(formula = y ~ ., data = data_mlr, trace = FALSE, 
##     maxit = 1000)
## 
## Coefficients:
##     (Intercept)           MR1           MR2
## 2 -1.684779e-08 -5.866299e-06 -5.866196e-06
## 3 -1.423518e-03 -1.414946e-05  2.756714e-06
## 
## Std. Errors:
##   (Intercept)          MR1 MR2
## 2  0.05333807 0.000000e+00 NaN
## 3  0.05335707 2.068111e-22 NaN
## 
## Residual Deviance: 4631.748 
## AIC: 4643.748 
## 
## HI1 - Confusion Matrix:
##       y_pred
## y_data   1   2   3
##      1 355 348   0
##      2 352 351   0
##      3 347 355   0

## 
## HI1 - Odds Ratios:
##   (Intercept)       MR1       MR2
## 2   1.0000000 0.9999941 0.9999941
## 3   0.9985775 0.9999859 1.0000028
## 
## HI1 - Accuracy:
## [1] 0.3349146

# Visualization: Model Accuracy Comparison
accuracy_data <- data.frame(
  Model = c(names(lda_accuracies), names(mlr_accuracies)),
  Accuracy = c(unlist(lda_accuracies), unlist(mlr_accuracies)),
  Method = c(rep("LDA", length(lda_accuracies)), rep("MLR", length(mlr_accuracies)))
)

# Create bar chart for accuracy comparison
p_accuracy <- ggplot(accuracy_data, aes(x = Model, y = Accuracy, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("LDA" = "#4e79a7", "MLR" = "#f28e2b")) +
  labs(title = "Model Accuracy Comparison (LDA vs MLR)", x = "Model", y = "Accuracy") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_accuracy)

print(accuracy_data)

##     Model  Accuracy Method
## AA1   AA1 0.8643907    LDA
## BB1   BB1 0.8553817    LDA
## CC1   CC1 0.7894737    LDA
## EE1   EE1 0.8334915    LDA
## FF1   FF1 0.7922201    LDA
## GG1   GG1 0.7680266    LDA
## AB1   AB1 0.8942627    MLR
## BC1   BC1 0.8596491    MLR
## CD1   CD1 0.8022760    MLR
## DE1   DE1 0.7799905    MLR
## EF1   EF1 0.8595825    MLR
## FG1   FG1 0.7912713    MLR
## GH1   GH1 0.7732448    MLR
## HI1   HI1 0.3349146    MLR

Analisis Kesehatan Janin melalui CTG Dengan Metode Linear Diskriminan Analisis dan Multinomial Regresi Logistik

Kelompok 3 Nafila Hanum Al Hasaniy Sheira En Nadia Dafanov Dixie Einkinderen

Load Dataset

Korelasi Awal

Missing Value

Deteksi Outlier

Boxplot Sebelum Preprocessing

Winsorizing untuk Mengurangi Outlier

Visualisasi Boxplot Setelah Winsorizing

Pengecekan dan Penghapusan Kolom Konstan atau Nol Varians

Deteksi dan Penghapusan Baris Duplikat

Penghapusan Kolom yang Sangat Berkorelasi

SMOTE untuk Menyeimbangkan Data

Visualization: Class distribution before and after SMOTE

Pemisahan Dataset

Uji asumsi, pemodelan, uji signifikansi

Kelompok 3
Nafila Hanum Al Hasaniy
Sheira En Nadia
Dafanov Dixie Einkinderen