# ================================== 1. PREPROCESSING =====================================#
data <- read.csv("fetal_health.csv")

#--Cek missing value
missing_data <- sapply(data, function(x) sum(is.na(x)))
print("Missing Values Count:")
## [1] "Missing Values Count:"
print(missing_data)
##                                         baseline.value 
##                                                      0 
##                                          accelerations 
##                                                      0 
##                                         fetal_movement 
##                                                      0 
##                                   uterine_contractions 
##                                                      0 
##                                    light_decelerations 
##                                                      0 
##                                   severe_decelerations 
##                                                      0 
##                               prolongued_decelerations 
##                                                      0 
##                        abnormal_short_term_variability 
##                                                      0 
##                   mean_value_of_short_term_variability 
##                                                      0 
## percentage_of_time_with_abnormal_long_term_variability 
##                                                      0 
##                    mean_value_of_long_term_variability 
##                                                      0 
##                                        histogram_width 
##                                                      0 
##                                          histogram_min 
##                                                      0 
##                                          histogram_max 
##                                                      0 
##                              histogram_number_of_peaks 
##                                                      0 
##                             histogram_number_of_zeroes 
##                                                      0 
##                                         histogram_mode 
##                                                      0 
##                                         histogram_mean 
##                                                      0 
##                                       histogram_median 
##                                                      0 
##                                     histogram_variance 
##                                                      0 
##                                     histogram_tendency 
##                                                      0 
##                                           fetal_health 
##                                                      0
#--Cek dan hapus duplikat
duplicate_rows <- sum(duplicated(data))
cat("Jumlah duplikat dalam dataset: ", duplicate_rows, "\n")
## Jumlah duplikat dalam dataset:  13
data <- data[!duplicated(data), ]
cat("Dataset setelah menghapus duplikat: ", nrow(data), "baris\n")
## Dataset setelah menghapus duplikat:  2113 baris
#--Ubah target menjadi faktor
data$fetal_health <- as.factor(data$fetal_health)

#--Handling Outliers
num_vars <- names(data)[sapply(data, is.numeric)]

winsorize_iqr <- function(dataset) {
  for (col in names(dataset)) {
    if (is.numeric(dataset[[col]])) {
      Q1 <- quantile(dataset[[col]], 0.25, na.rm = TRUE)
      Q3 <- quantile(dataset[[col]], 0.75, na.rm = TRUE)
      IQR_value <- Q3 - Q1
      lower_bound <- Q1 - 1.5 * IQR_value
      upper_bound <- Q3 + 1.5 * IQR_value
      dataset[[col]][dataset[[col]] < lower_bound] <- lower_bound
      dataset[[col]][dataset[[col]] > upper_bound] <- upper_bound
    }
  }
  return(dataset)
}

#--Terapkan winsorization pada dataset
data <- winsorize_iqr(data)


#================================== 2. EDA ========================================#
cat("Statistika Deskriptif:\n")
## Statistika Deskriptif:
summary(data)
##  baseline.value  accelerations      fetal_movement     uterine_contractions
##  Min.   :106.0   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000    
##  1st Qu.:126.0   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.002000    
##  Median :133.0   Median :0.002000   Median :0.000000   Median :0.005000    
##  Mean   :133.3   Mean   :0.003177   Mean   :0.001747   Mean   :0.004387    
##  3rd Qu.:140.0   3rd Qu.:0.006000   3rd Qu.:0.003000   3rd Qu.:0.007000    
##  Max.   :160.0   Max.   :0.015000   Max.   :0.007500   Max.   :0.014500    
##  light_decelerations severe_decelerations prolongued_decelerations
##  Min.   :0.000000    Min.   :0            Min.   :0               
##  1st Qu.:0.000000    1st Qu.:0            1st Qu.:0               
##  Median :0.000000    Median :0            Median :0               
##  Mean   :0.001744    Mean   :0            Mean   :0               
##  3rd Qu.:0.003000    3rd Qu.:0            3rd Qu.:0               
##  Max.   :0.007500    Max.   :0            Max.   :0               
##  abnormal_short_term_variability mean_value_of_short_term_variability
##  Min.   :12.00                   Min.   :0.200                       
##  1st Qu.:32.00                   1st Qu.:0.700                       
##  Median :49.00                   Median :1.200                       
##  Mean   :46.99                   Mean   :1.302                       
##  3rd Qu.:61.00                   3rd Qu.:1.700                       
##  Max.   :87.00                   Max.   :3.200                       
##  percentage_of_time_with_abnormal_long_term_variability
##  Min.   : 0.000                                        
##  1st Qu.: 0.000                                        
##  Median : 0.000                                        
##  Mean   : 6.631                                        
##  3rd Qu.:11.000                                        
##  Max.   :27.500                                        
##  mean_value_of_long_term_variability histogram_width  histogram_min   
##  Min.   : 0.00                       Min.   :  3.00   Min.   : 50.00  
##  1st Qu.: 4.60                       1st Qu.: 37.00   1st Qu.: 67.00  
##  Median : 7.40                       Median : 68.00   Median : 93.00  
##  Mean   : 7.98                       Mean   : 70.54   Mean   : 93.56  
##  3rd Qu.:10.80                       3rd Qu.:100.00   3rd Qu.:120.00  
##  Max.   :20.10                       Max.   :180.00   Max.   :159.00  
##  histogram_max   histogram_number_of_peaks histogram_number_of_zeroes
##  Min.   :122.0   Min.   : 0.00             Min.   :0                 
##  1st Qu.:152.0   1st Qu.: 2.00             1st Qu.:0                 
##  Median :162.0   Median : 4.00             Median :0                 
##  Mean   :163.9   Mean   : 4.06             Mean   :0                 
##  3rd Qu.:174.0   3rd Qu.: 6.00             3rd Qu.:0                 
##  Max.   :207.0   Max.   :12.00             Max.   :0                 
##  histogram_mode  histogram_mean  histogram_median histogram_variance
##  Min.   :100.5   Min.   : 95.0   Min.   :100.5    Min.   : 0.00     
##  1st Qu.:129.0   1st Qu.:125.0   1st Qu.:129.0    1st Qu.: 2.00     
##  Median :139.0   Median :136.0   Median :139.0    Median : 7.00     
##  Mean   :137.9   Mean   :134.8   Mean   :138.2    Mean   :15.66     
##  3rd Qu.:148.0   3rd Qu.:145.0   3rd Qu.:148.0    3rd Qu.:24.00     
##  Max.   :176.5   Max.   :175.0   Max.   :176.5    Max.   :57.00     
##  histogram_tendency fetal_health
##  Min.   :-1.0000    1:1646      
##  1st Qu.: 0.0000    2: 292      
##  Median : 0.0000    3: 175      
##  Mean   : 0.3185                
##  3rd Qu.: 1.0000                
##  Max.   : 1.0000
#--Distribusi variabel target (fetal_health)
cat("\nDistribusi Target (fetal_health):\n")
## 
## Distribusi Target (fetal_health):
print(table(data$fetal_health))
## 
##    1    2    3 
## 1646  292  175
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
ggplot(data, aes(x = factor(fetal_health))) +
  geom_bar(fill = "steelblue") +
  labs(title = "Distribusi Kesehatan Janin (fetal_health)", x = "Kategori Kesehatan", y = "Jumlah") +
  theme_minimal()

#--korelasi
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
numeric_cols <- data[, sapply(data, is.numeric)]
corrplot(cor(numeric_cols), method = "color",
         tl.cex = 0.3, number.cex = 0.2,
         col = colorRampPalette(c("blue", "white", "red"))(200),
         type = "full", addCoef.col = "black")
## Warning in cor(numeric_cols): the standard deviation is zero

#--Boxplot semua variabel numerik terhadap fetal_health
num_vars <- names(data)[sapply(data, is.numeric)]
par(mfrow = c(3, 3))
for (col in num_vars) {
  boxplot(data[[col]] ~ data$fetal_health,
          main = paste("Boxplot:", col), xlab = "Fetal Health", ylab = col)
}

#--distribusi
par(mfrow = c(3, 3))

for (col in num_vars) {
  plot(density(data[[col]]), main = paste("Density:", col),
       xlab = col, col = "blue", lwd = 2)
}

#--visualisasi setelah penanganan outliers
num_vars <- names(data)[sapply(data, is.numeric)]
par(mfrow = c(3, 3))

for (col in num_vars) {
  boxplot(data[[col]],
          main = paste("Sesudah -", col),
          col = "lightblue", border = "black")
  
  
}

#=========================================SMOTE=========================================#
library(UBL)
## Warning: package 'UBL' was built under R version 4.4.3
## Loading required package: MBA
## Warning: package 'MBA' was built under R version 4.4.3
## Loading required package: gstat
## Warning: package 'gstat' was built under R version 4.4.3
## Loading required package: automap
## Warning: package 'automap' was built under R version 4.4.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.4.3
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
data_smote <- SmoteClassif(fetal_health ~ ., data, C.perc = "balance")
cat("Distribusi setelah SMOTE:\n")
## Distribusi setelah SMOTE:
print(table(data_smote$fetal_health))
## 
##   1   2   3 
## 704 703 704
# ================================== 3. FA (Factor Analysis) ===================================== #
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:UBL':
## 
##     phi
## The following object is masked from 'package:randomForest':
## 
##     outlier
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
#---Ambil hanya prediktor numerik (exclude target dan faktor)
numeric_predictors <- data_smote %>% dplyr::select_if(is.numeric)
non_constant_predictors <- numeric_predictors[, apply(numeric_predictors, 2, var) != 0]

#---Standardisasi data
scale_data <- scale(non_constant_predictors)

#---Matriks kovarians dan eigen decomposition (PCA manual)
varcov <- cov(scale_data)
pc <- eigen(varcov)

#---Nilai eigen dan vektor eigen
pc$values
##  [1] 7.77831168 2.95695970 1.45775175 1.31594611 1.13502340 0.81262226
##  [7] 0.60949539 0.39490916 0.37408270 0.28454114 0.23664489 0.21761567
## [13] 0.15451727 0.12196499 0.10008573 0.03604602 0.01316958 0.00031256
pc$vectors
##              [,1]         [,2]         [,3]        [,4]         [,5]
##  [1,]  0.22596373 -0.233324996 -0.311048521  0.31970566 -0.149061104
##  [2,] -0.06260451 -0.311014889  0.263805163  0.02798256  0.510634344
##  [3,]  0.02575570 -0.004754597 -0.421125905 -0.43484822  0.448747480
##  [4,] -0.18585900 -0.136855262  0.325287446  0.38135862 -0.180629805
##  [5,] -0.28686236  0.044141672 -0.108966909  0.16265379 -0.196159827
##  [6,]  0.12877783  0.332387467 -0.404101035  0.19360868 -0.071351764
##  [7,] -0.32026341 -0.118382329  0.013244516  0.06122653 -0.021671662
##  [8,]  0.26902069  0.164551963 -0.259848311  0.03140029 -0.130279932
##  [9,]  0.08693733 -0.276991534  0.177340522 -0.49143962 -0.191714465
## [10,] -0.30179383 -0.225550934 -0.221314366 -0.05381258 -0.072577737
## [11,]  0.30820216  0.100463747  0.127356763  0.22476008  0.208372598
## [12,] -0.15727427 -0.361085573 -0.295770176  0.28021086  0.207958705
## [13,] -0.24161413 -0.214573358 -0.310845371 -0.13710106 -0.089571815
## [14,]  0.26890667 -0.338648291 -0.057388588  0.11361131 -0.029857648
## [15,]  0.30414874 -0.288809548 -0.003474171  0.05433000  0.046482068
## [16,]  0.28479119 -0.329809309 -0.057910805  0.10517576  0.005468332
## [17,] -0.30601005 -0.055597088 -0.142124246  0.09254506 -0.032367771
## [18,]  0.14034035 -0.229088901 -0.003991148 -0.25705297 -0.530263123
##               [,6]        [,7]        [,8]        [,9]        [,10]
##  [1,] -0.076624633 -0.18822365  0.26528624  0.12264568 -0.236646198
##  [2,]  0.458750683  0.36767471 -0.05755306  0.10883451  0.052361777
##  [3,]  0.175391164 -0.60334841 -0.13680482  0.07580265  0.028273090
##  [4,]  0.035991650 -0.44129801 -0.63157235  0.14834627 -0.020874322
##  [5,]  0.129851488 -0.16755013  0.34786132 -0.13302860  0.708129968
##  [6,]  0.120235952  0.25012335 -0.24571383  0.63942283  0.208271213
##  [7,]  0.019085387 -0.15961592  0.08266064 -0.04890967 -0.010618411
##  [8,]  0.002920477  0.16050306 -0.31569141 -0.48351346 -0.146434847
##  [9,] -0.533883855  0.02359049 -0.04967930  0.31405728  0.146676071
## [10,] -0.023258460  0.17918399 -0.03862281  0.12245886 -0.070654690
## [11,] -0.136437277 -0.17210050  0.01608773 -0.14231399  0.096771338
## [12,] -0.304546891  0.11323115 -0.05568014  0.03185951 -0.002218788
## [13,] -0.043110084  0.19999681 -0.38609310 -0.37195472  0.177635800
## [14,]  0.081770631 -0.08380746  0.00445630 -0.05051365  0.095206453
## [15,] -0.005006804  0.04472167  0.05083687 -0.01635675  0.106596187
## [16,]  0.047527173 -0.02257062  0.02971627 -0.01445208  0.169176859
## [17,]  0.044859578 -0.05279117  0.24668634  0.05791972 -0.495954143
## [18,]  0.556911196 -0.03631295  0.01033712  0.05472988 -0.105794452
##              [,11]        [,12]       [,13]       [,14]        [,15]
##  [1,]  0.281925233 -0.049682223  0.08540667  0.56415875 -0.219911732
##  [2,] -0.141457430 -0.006166152  0.19498264  0.27869638 -0.199807593
##  [3,] -0.069006455  0.055759513 -0.02892898  0.03873426 -0.028382080
##  [4,] -0.134606389  0.006941658 -0.07994246  0.13652352 -0.009639133
##  [5,] -0.329562361  0.032740203 -0.02405200  0.15463106 -0.114026938
##  [6,] -0.036410002 -0.115156068  0.22001170 -0.10524935  0.019169577
##  [7,]  0.295840907  0.256651053  0.78026615 -0.20052843  0.054923897
##  [8,] -0.433898864  0.364023572  0.27418809  0.17534012 -0.075027913
##  [9,] -0.272560610 -0.037254932  0.25777650  0.12050595 -0.159294610
## [10,] -0.009072211  0.316683362 -0.20607876  0.07308912  0.174304337
## [11,] -0.019882353 -0.257864444  0.14555541 -0.32705382 -0.412395367
## [12,] -0.060920756  0.275775739 -0.21703364 -0.41860396 -0.326199234
## [13,]  0.271698645 -0.569777447  0.04630322  0.07509456 -0.066275608
## [14,] -0.093861618 -0.105584096  0.10848482 -0.23164377  0.625540622
## [15,] -0.107355017 -0.031625670 -0.05804049  0.04225747  0.069275255
## [16,] -0.040030129 -0.010483811  0.01033261 -0.04659426  0.133195801
## [17,] -0.561797797 -0.443773172  0.12086182 -0.10746490  0.033575901
## [18,]  0.045248347  0.059656569 -0.07265415 -0.31797733 -0.370416197
##             [,16]        [,17]         [,18]
##  [1,]  0.18614985 -0.007780756  2.247649e-03
##  [2,]  0.16872633  0.023685871  2.325665e-04
##  [3,] -0.01065034 -0.017890784  3.240639e-04
##  [4,] -0.04127655 -0.022240225 -6.850845e-04
##  [5,]  0.07543501 -0.031632745 -6.464160e-04
##  [6,] -0.04475282 -0.031466897 -2.335489e-04
##  [7,] -0.18074512 -0.075672448  3.706903e-03
##  [8,]  0.04223795  0.017106103 -8.827039e-04
##  [9,]  0.12113034  0.033215608 -6.795961e-05
## [10,] -0.02673692  0.020659484  7.530413e-01
## [11,]  0.10111069  0.010072272  5.755148e-01
## [12,]  0.12520136 -0.033617770 -3.172362e-01
## [13,] -0.01723296 -0.019133925  1.069100e-03
## [14,]  0.53182932 -0.104835095  6.964313e-03
## [15,] -0.56999674 -0.674450220  2.145457e-02
## [16,] -0.47109082  0.720418343 -2.308492e-02
## [17,] -0.13196223  0.034905453  3.027166e-03
## [18,]  0.06522213 -0.036492715  1.665931e-05
#---Scree Plot manual
plot(pc$values, type="b", main="Scree Plot", xlab="Number of Factors", ylab="Eigenvalues")

#---Proporsi kumulatif
cumprop <- cumsum(pc$values) / sum(pc$values)
cumprop
##  [1] 0.4321284 0.5964040 0.6773902 0.7504983 0.8135551 0.8587008 0.8925617
##  [8] 0.9145011 0.9352835 0.9510913 0.9642382 0.9763280 0.9849123 0.9916881
## [15] 0.9972484 0.9992510 0.9999826 1.0000000
#---Jumlah faktor berdasarkan threshold kumulatif >= 0.8
n_factors <- which(cumprop >= 0.80)[1]
cat("Jumlah faktor berdasarkan threshold kumulatif >= 0.80:", n_factors, "\n")
## Jumlah faktor berdasarkan threshold kumulatif >= 0.80: 5
#---Menghitung loading matrix manual (tanpa rotasi)
L <- matrix(nrow = nrow(pc$vectors), ncol = n_factors)
for (i in 1:n_factors) {
  L[, i] <- sqrt(pc$values[i]) * pc$vectors[, i]
}
colnames(L) <- paste0("F", 1:n_factors)
rownames(L) <- colnames(scale_data)
print(L)
##                                                                 F1           F2
## baseline.value                                          0.63020437 -0.401221290
## accelerations                                          -0.17460163 -0.534815375
## fetal_movement                                          0.07183169 -0.008175916
## uterine_contractions                                   -0.51835378 -0.235333743
## light_decelerations                                    -0.80004836  0.075905192
## abnormal_short_term_variability                         0.35915653  0.571567259
## mean_value_of_short_term_variability                   -0.89320264 -0.203568034
## percentage_of_time_with_abnormal_long_term_variability  0.75028860  0.282960471
## mean_value_of_long_term_variability                     0.24246495 -0.476309451
## histogram_width                                        -0.84169167 -0.387853158
## histogram_min                                           0.85956427  0.172755576
## histogram_max                                          -0.43863204 -0.620915985
## histogram_number_of_peaks                              -0.67385273 -0.368976325
## histogram_mode                                          0.74997062 -0.582333254
## histogram_mean                                          0.84825943 -0.496631486
## histogram_median                                        0.79427194 -0.567133907
## histogram_variance                                     -0.85345056 -0.095603710
## histogram_tendency                                      0.39140397 -0.393936980
##                                                                  F3          F4
## baseline.value                                         -0.375551868  0.36674937
## accelerations                                           0.318511470  0.03210011
## fetal_movement                                         -0.508456428 -0.49883481
## uterine_contractions                                    0.392743574  0.43747438
## light_decelerations                                    -0.131563803  0.18658780
## abnormal_short_term_variability                        -0.487901044  0.22209761
## mean_value_of_short_term_variability                    0.015991083  0.07023583
## percentage_of_time_with_abnormal_long_term_variability -0.313734070  0.03602075
## mean_value_of_long_term_variability                     0.214116319 -0.56375345
## histogram_width                                        -0.267209191 -0.06173093
## histogram_min                                           0.153767233  0.25783284
## histogram_max                                          -0.357105192  0.32144303
## histogram_number_of_peaks                              -0.375306589 -0.15727506
## histogram_mode                                         -0.069289484  0.13032886
## histogram_mean                                         -0.004194623  0.06232449
## histogram_median                                       -0.069919995  0.12065205
## histogram_variance                                     -0.171597105  0.10616279
## histogram_tendency                                     -0.004818807 -0.29487753
##                                                                  F5
## baseline.value                                         -0.158805939
## accelerations                                           0.544016947
## fetal_movement                                          0.478084243
## uterine_contractions                                   -0.192438438
## light_decelerations                                    -0.208983731
## abnormal_short_term_variability                        -0.076016370
## mean_value_of_short_term_variability                   -0.023088442
## percentage_of_time_with_abnormal_long_term_variability -0.138796953
## mean_value_of_long_term_variability                    -0.204247754
## histogram_width                                        -0.077322490
## histogram_min                                           0.221994908
## histogram_max                                           0.221553958
## histogram_number_of_peaks                              -0.095427552
## histogram_mode                                         -0.031809585
## histogram_mean                                          0.049520823
## histogram_median                                        0.005825822
## histogram_variance                                     -0.034483806
## histogram_tendency                                     -0.564928953
#---FA tanpa rotasi menggunakan `psych::fa`
fa_result <- fa(r = scale_data, covar = TRUE, nfactors = n_factors, rotate = "varimax", scores = "regression")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
#---Ambil factor loadings
load_no_rotation <- fa_result$loadings
print(load_no_rotation)
## 
## Loadings:
##                                                        MR1    MR2    MR3   
## baseline.value                                                 0.842 -0.233
## accelerations                                           0.135  0.104  0.546
## fetal_movement                                                             
## uterine_contractions                                    0.310 -0.106  0.254
## light_decelerations                                     0.633 -0.397 -0.101
## abnormal_short_term_variability                        -0.223        -0.747
## mean_value_of_short_term_variability                    0.714 -0.371  0.272
## percentage_of_time_with_abnormal_long_term_variability -0.467  0.360 -0.535
## mean_value_of_long_term_variability                            0.220  0.383
## histogram_width                                         0.943 -0.188  0.231
## histogram_min                                          -0.839  0.439 -0.139
## histogram_max                                           0.710  0.354  0.304
## histogram_number_of_peaks                               0.785 -0.123  0.162
## histogram_mode                                         -0.268  0.885  0.131
## histogram_mean                                         -0.427  0.849  0.174
## histogram_median                                       -0.311  0.912  0.139
## histogram_variance                                      0.728 -0.353       
## histogram_tendency                                             0.395       
##                                                        MR4    MR5   
## baseline.value                                                      
## accelerations                                                       
## fetal_movement                                                -0.424
## uterine_contractions                                           0.693
## light_decelerations                                    -0.117  0.348
## abnormal_short_term_variability                        -0.138 -0.173
## mean_value_of_short_term_variability                   -0.140  0.309
## percentage_of_time_with_abnormal_long_term_variability  0.106 -0.281
## mean_value_of_long_term_variability                     0.362 -0.211
## histogram_width                                                     
## histogram_min                                          -0.223       
## histogram_max                                          -0.475       
## histogram_number_of_peaks                                     -0.102
## histogram_mode                                          0.212       
## histogram_mean                                          0.177 -0.176
## histogram_median                                        0.185 -0.108
## histogram_variance                                     -0.183  0.207
## histogram_tendency                                      0.726       
## 
##                  MR1   MR2   MR3   MR4   MR5
## SS loadings    4.902 4.196 1.757 1.158 1.132
## Proportion Var 0.272 0.233 0.098 0.064 0.063
## Cumulative Var 0.272 0.505 0.603 0.667 0.730
#---Visualisasi loading faktor tanpa rotasi
plot(load_no_rotation[, c(1,2)], type = "n", main = "Plot Faktor Tanpa Rotasi")
text(load_no_rotation[, c(1,2)], labels = rownames(load_no_rotation), cex = 0.7)

#---Diagram faktor (tanpa rotasi)
fa.diagram(fa_result)

#---Ambil skor faktornya
fa_scores <- as.data.frame(fa_result$scores)
colnames(fa_scores) <- paste0("F", 1:n_factors)

#---Gabungkan kembali dengan target
data_fa <- cbind(fa_scores, fetal_health = data_smote$fetal_health)

#---cek kelas
print(table(data_fa$fetal_health))
## 
##   1   2   3 
## 704 703 704
#================================= 5. Klasifikasi LDA =======================================#
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3
# Uji Normalitas Multivariat per kelas (Mardia)
cat("\n=== Uji Normalitas Multivariat (Mardia) ===\n")
## 
## === Uji Normalitas Multivariat (Mardia) ===
for (kelas in unique(data_fa$fetal_health)) {
  cat("\nKelas:", kelas, "\n")
  hasil_mvn <- mvn(data_fa[data_fa$fetal_health == kelas, -which(names(data_fa) == "fetal_health")],
                   mvnTest = "mardia", multivariatePlot = "none")
  print(hasil_mvn$multivariateNormality)
}
## 
## Kelas: 1 
##              Test        Statistic p value Result
## 1 Mardia Skewness 2406.82382354637       0     NO
## 2 Mardia Kurtosis 65.0846730028198       0     NO
## 3             MVN             <NA>    <NA>     NO
## 
## Kelas: 2 
##              Test        Statistic               p value Result
## 1 Mardia Skewness 1617.92375783486 5.36315175479715e-318     NO
## 2 Mardia Kurtosis 23.7696767987627                     0     NO
## 3             MVN             <NA>                  <NA>     NO
## 
## Kelas: 3 
##              Test        Statistic               p value Result
## 1 Mardia Skewness 1205.18818388139 1.76403400569983e-230     NO
## 2 Mardia Kurtosis 15.4072950768845                     0     NO
## 3             MVN             <NA>                  <NA>     NO
# Uji Homogenitas Varians-Kovarians (Box's M Test)
cat("\n=== Uji Homogenitas Varians-Kovarians (Box's M Test) ===\n")
## 
## === Uji Homogenitas Varians-Kovarians (Box's M Test) ===
boxm_result <- boxM(data_fa[, -which(names(data_fa) == "fetal_health")], data_fa$fetal_health)
print(boxm_result)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data_fa[, -which(names(data_fa) == "fetal_health")]
## Chi-Sq (approx.) = 1766.2, df = 30, p-value < 2.2e-16
#---uji signifikansi pakai wilks lambda
manova_lda <- manova(as.matrix(data_fa[, -which(names(data_fa) == "fetal_health")]) ~ data_fa$fetal_health)
summary_manova <- summary(manova_lda, test = "Wilks")
cat("\n=== Uji Signifikansi Model LDA (Wilks' Lambda) ===\n")
## 
## === Uji Signifikansi Model LDA (Wilks' Lambda) ===
print(summary_manova)
##                        Df   Wilks approx F num Df den Df    Pr(>F)    
## data_fa$fetal_health    2 0.30352      343     10   4208 < 2.2e-16 ***
## Residuals            2108                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Pastikan labelnya bertipe faktor
data_fa$fetal_health <- as.factor(data_fa$fetal_health)

#lda model
lda_model <- lda(fetal_health ~ ., data = data_fa)


#---uji signifikansi pakai wilks lambda
manova_lda <- manova(as.matrix(data_fa[, -which(names(data_fa) == "fetal_health")]) ~ data_fa$fetal_health)
summary_manova <- summary(manova_lda, test = "Wilks")
cat("\n=== Uji Signifikansi Model LDA (Wilks' Lambda) ===\n")
## 
## === Uji Signifikansi Model LDA (Wilks' Lambda) ===
print(summary_manova)
##                        Df   Wilks approx F num Df den Df    Pr(>F)    
## data_fa$fetal_health    2 0.30352      343     10   4208 < 2.2e-16 ***
## Residuals            2108                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#---Uji signifikansi Variabel (Koefisien Diskriminan)
print(round(lda_model$scaling, 4))
##        LD1     LD2
## F1  0.3258  0.2938
## F2 -0.6064 -0.8998
## F3 -1.1878  0.6421
## F4 -0.5139 -0.1066
## F5 -0.1161  0.5863
#prediksi pada data yang sama
lda_pred <- predict(lda_model, newdata = data_fa)

#confusion
confusion_matrix <- table(Predicted = lda_pred$class, Actual = data_fa$fetal_health)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(confusion_matrix)
##          Actual
## Predicted   1   2   3
##         1 571 109  26
##         2 100 556 195
##         3  33  38 483
#heatmap
conf_mat_table <- table(Predicted = lda_pred$class, Actual = data_fa$fetal_health)
conf_mat_df <- as.data.frame(conf_mat_table)

ggplot(conf_mat_df, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile(color = "white") +
  geom_text(aes(label = Freq), size = 5, color = "black") +
  scale_fill_gradient(low = "lightblue", high = "red") +
  labs(title = "Heatmap Confusion Matrix (LDA - In Sample)",
       x = "Actual Class", y = "Predicted Class") +
  theme_minimal()

accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix) * 100
cat("Akurasi:", round(accuracy, 2), "%\n")
## Akurasi: 76.27 %
#===================================== 6. klasifikasi mlr =================================#
library(nnet)
## Warning: package 'nnet' was built under R version 4.4.3
library(caret)
library(ggplot2)
library(reshape2)


#=============================uji asumsi (VIF) ========================#
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
library(dplyr)

#---Buat model VIF hanya dari prediktor, tanpa pake target 
predictors_only <- dplyr::select(data_fa, -fetal_health)

model_vif <- lm(rep(1, nrow(predictors_only)) ~ ., data = predictors_only)
vif_values <- car::vif(model_vif)

print("VIF antar prediktor:")
## [1] "VIF antar prediktor:"
print(vif_values)
##       F1       F2       F3       F4       F5 
## 1.003850 1.013189 1.010842 1.002323 1.006092
#==========masuk klasifikasi=========#
set.seed(123)
# Pastikan label faktor
data_fa$fetal_health <- as.factor(data_fa$fetal_health)

# Latih model multinomial logistic 
model_multi <- multinom(fetal_health ~ ., data = data_fa)
## # weights:  21 (12 variable)
## initial  value 2319.170541 
## iter  10 value 1408.166576
## iter  20 value 1134.866731
## final  value 1134.721114 
## converged
# --- Uji Serentak ---
model_null <- multinom(fetal_health ~ 1, data = data_fa)
## # weights:  6 (2 variable)
## initial  value 2319.170541 
## final  value 2319.170068 
## converged
lrt_stat <- 2 * (logLik(model_multi) - logLik(model_null))
df_diff <- attr(logLik(model_multi), "df") - attr(logLik(model_null), "df")
p_value <- pchisq(lrt_stat, df = df_diff, lower.tail = FALSE)

cat("Uji Serentak (Likelihood Ratio Test):\n")
## Uji Serentak (Likelihood Ratio Test):
cat("Statistik LRT =", round(lrt_stat, 3), "\n")
## Statistik LRT = 2368.898
cat("Derajat kebebasan =", df_diff, "\n")
## Derajat kebebasan = 10
cat("P-value =", p_value, "\n")
## P-value = 0
# --- Uji Parsial ---
summary_model <- summary(model_multi)

coefs <- summary_model$coefficients
std_err <- summary_model$standard.errors

z_values <- coefs / std_err
p_values <- 2 * (1 - pnorm(abs(z_values)))

cat("\nUji Parsial (Wald Test) untuk tiap koefisien:\n")
## 
## Uji Parsial (Wald Test) untuk tiap koefisien:
print(round(p_values, 4))
##   (Intercept) F1 F2 F3     F4 F5
## 2      0.0463  0  0  0 0.8311  0
## 3      0.0012  0  0  0 0.0000  0
# Prediksi pada data yang sama (in-sample)
prediksi <- predict(model_multi, newdata = data_fa)

# Confusion Matrix
confusion_matrix <- table(Predicted = prediksi, Actual = data_fa$fetal_health)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(confusion_matrix)
##          Actual
## Predicted   1   2   3
##         1 572  76  38
##         2  94 529 124
##         3  38  98 542
# Visualisasi confusion matrix sebagai heatmap
cm_df <- as.data.frame(confusion_matrix)
colnames(cm_df) <- c("Predicted", "Actual", "Freq")

ggplot(data = cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile(color = "white") +
  geom_text(aes(label = Freq), vjust = 0.5, fontface = "bold", color = "black") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(title = "Confusion Matrix (Multinomial Logistic Regression - In Sample)",
       x = "Actual Label", y = "Predicted Label") +
  theme_minimal()

# Hitung akurasi
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
cat("Akurasi:", round(accuracy * 100, 2), "%\n")
## Akurasi: 77.83 %
#---Interpretasi menggunakan odds ratio
odds_ratios <- exp(coefs)
cat("\n=== Odds Ratio ===\n")
## 
## === Odds Ratio ===
print(round(odds_ratios, 3))
##   (Intercept)    F1    F2    F3    F4    F5
## 2       0.798 0.510 3.705 0.225 0.979 0.246
## 3       0.699 1.672 0.276 0.038 0.332 0.612