# ================================== 1. PREPROCESSING =====================================#
data <- read.csv("fetal_health.csv")
#---Cek missing value
missing_data <- sapply(data, function(x) sum(is.na(x)))
print("Missing Values Count:")
## [1] "Missing Values Count:"
print(missing_data)
## baseline.value
## 0
## accelerations
## 0
## fetal_movement
## 0
## uterine_contractions
## 0
## light_decelerations
## 0
## severe_decelerations
## 0
## prolongued_decelerations
## 0
## abnormal_short_term_variability
## 0
## mean_value_of_short_term_variability
## 0
## percentage_of_time_with_abnormal_long_term_variability
## 0
## mean_value_of_long_term_variability
## 0
## histogram_width
## 0
## histogram_min
## 0
## histogram_max
## 0
## histogram_number_of_peaks
## 0
## histogram_number_of_zeroes
## 0
## histogram_mode
## 0
## histogram_mean
## 0
## histogram_median
## 0
## histogram_variance
## 0
## histogram_tendency
## 0
## fetal_health
## 0
#---Cek dan hapus duplikat
duplicate_rows <- sum(duplicated(data))
cat("Jumlah duplikat dalam dataset: ", duplicate_rows, "\n")
## Jumlah duplikat dalam dataset: 13
data <- data[!duplicated(data), ]
cat("Dataset setelah menghapus duplikat: ", nrow(data), "baris\n")
## Dataset setelah menghapus duplikat: 2113 baris
#---Ubah target menjadi faktor
data$fetal_health <- as.factor(data$fetal_health)
#---Handling Outliers
num_vars <- names(data)[sapply(data, is.numeric)]
winsorize_iqr <- function(dataset) {
for (col in names(dataset)) {
if (is.numeric(dataset[[col]])) {
Q1 <- quantile(dataset[[col]], 0.25, na.rm = TRUE)
Q3 <- quantile(dataset[[col]], 0.75, na.rm = TRUE)
IQR_value <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
dataset[[col]][dataset[[col]] < lower_bound] <- lower_bound
dataset[[col]][dataset[[col]] > upper_bound] <- upper_bound
}
}
return(dataset)
}
#---Terapkan winsorization pada dataset
data <- winsorize_iqr(data)
#================================== 2. EDA ========================================#
cat("Statistika Deskriptif:\n")
## Statistika Deskriptif:
summary(data)
## baseline.value accelerations fetal_movement uterine_contractions
## Min. :106.0 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:126.0 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.002000
## Median :133.0 Median :0.002000 Median :0.000000 Median :0.005000
## Mean :133.3 Mean :0.003177 Mean :0.001747 Mean :0.004387
## 3rd Qu.:140.0 3rd Qu.:0.006000 3rd Qu.:0.003000 3rd Qu.:0.007000
## Max. :160.0 Max. :0.015000 Max. :0.007500 Max. :0.014500
## light_decelerations severe_decelerations prolongued_decelerations
## Min. :0.000000 Min. :0 Min. :0
## 1st Qu.:0.000000 1st Qu.:0 1st Qu.:0
## Median :0.000000 Median :0 Median :0
## Mean :0.001744 Mean :0 Mean :0
## 3rd Qu.:0.003000 3rd Qu.:0 3rd Qu.:0
## Max. :0.007500 Max. :0 Max. :0
## abnormal_short_term_variability mean_value_of_short_term_variability
## Min. :12.00 Min. :0.200
## 1st Qu.:32.00 1st Qu.:0.700
## Median :49.00 Median :1.200
## Mean :46.99 Mean :1.302
## 3rd Qu.:61.00 3rd Qu.:1.700
## Max. :87.00 Max. :3.200
## percentage_of_time_with_abnormal_long_term_variability
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 6.631
## 3rd Qu.:11.000
## Max. :27.500
## mean_value_of_long_term_variability histogram_width histogram_min
## Min. : 0.00 Min. : 3.00 Min. : 50.00
## 1st Qu.: 4.60 1st Qu.: 37.00 1st Qu.: 67.00
## Median : 7.40 Median : 68.00 Median : 93.00
## Mean : 7.98 Mean : 70.54 Mean : 93.56
## 3rd Qu.:10.80 3rd Qu.:100.00 3rd Qu.:120.00
## Max. :20.10 Max. :180.00 Max. :159.00
## histogram_max histogram_number_of_peaks histogram_number_of_zeroes
## Min. :122.0 Min. : 0.00 Min. :0
## 1st Qu.:152.0 1st Qu.: 2.00 1st Qu.:0
## Median :162.0 Median : 4.00 Median :0
## Mean :163.9 Mean : 4.06 Mean :0
## 3rd Qu.:174.0 3rd Qu.: 6.00 3rd Qu.:0
## Max. :207.0 Max. :12.00 Max. :0
## histogram_mode histogram_mean histogram_median histogram_variance
## Min. :100.5 Min. : 95.0 Min. :100.5 Min. : 0.00
## 1st Qu.:129.0 1st Qu.:125.0 1st Qu.:129.0 1st Qu.: 2.00
## Median :139.0 Median :136.0 Median :139.0 Median : 7.00
## Mean :137.9 Mean :134.8 Mean :138.2 Mean :15.66
## 3rd Qu.:148.0 3rd Qu.:145.0 3rd Qu.:148.0 3rd Qu.:24.00
## Max. :176.5 Max. :175.0 Max. :176.5 Max. :57.00
## histogram_tendency fetal_health
## Min. :-1.0000 1:1646
## 1st Qu.: 0.0000 2: 292
## Median : 0.0000 3: 175
## Mean : 0.3185
## 3rd Qu.: 1.0000
## Max. : 1.0000
#---Distribusi variabel target (fetal_health)
cat("\nDistribusi Target (fetal_health):\n")
##
## Distribusi Target (fetal_health):
print(table(data$fetal_health))
##
## 1 2 3
## 1646 292 175
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
ggplot(data, aes(x = factor(fetal_health))) +
geom_bar(fill = "steelblue") +
labs(title = "Distribusi Kesehatan Janin (fetal_health)", x = "Kategori Kesehatan", y = "Jumlah") +
theme_minimal()

#---korelasi
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
numeric_cols <- data[, sapply(data, is.numeric)]
corrplot(cor(numeric_cols), method = "color",
tl.cex = 0.3, number.cex = 0.2,
col = colorRampPalette(c("blue", "white", "red"))(200),
type = "full", addCoef.col = "black")
## Warning in cor(numeric_cols): the standard deviation is zero

#---Boxplot semua variabel numerik terhadap fetal_health
num_vars <- names(data)[sapply(data, is.numeric)]
par(mfrow = c(3, 3))
for (col in num_vars) {
boxplot(data[[col]] ~ data$fetal_health,
main = paste("Boxplot:", col), xlab = "Fetal Health", ylab = col)
}


#--distribusi
par(mfrow = c(3, 3))

for (col in num_vars) {
plot(density(data[[col]]), main = paste("Density:", col),
xlab = col, col = "blue", lwd = 2)
}


#---visualisasi setelah penanganan outliers
num_vars <- names(data)[sapply(data, is.numeric)]
par(mfrow = c(3, 3))

for (col in num_vars) {
boxplot(data[[col]],
main = paste("Sesudah -", col),
col = "lightblue", border = "black")
}


# ================================== 3. FA (Factor Analysis) ===================================== #
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
#---Ambil hanya prediktor numerik (exclude target dan faktor)
numeric_predictors <- data %>% dplyr::select_if(is.numeric)
non_constant_predictors <- numeric_predictors[, apply(numeric_predictors, 2, var) != 0]
#---Standardisasi data
scale_data <- scale(non_constant_predictors)
#---Matriks kovarians dan eigen decomposition (PCA manual)
varcov <- cov(scale_data)
pc <- eigen(varcov)
#---Nilai eigen dan vektor eigen
pc$values
## [1] 5.939993932 3.557440003 1.704753117 1.457227786 1.324010955 0.936299532
## [7] 0.642214015 0.569947642 0.479736326 0.343847479 0.284374283 0.238523068
## [13] 0.204055256 0.132892799 0.111883040 0.050850244 0.020599614 0.001350909
pc$vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.234431416 0.315676136 -0.20201148 -0.115138988 0.1991823629
## [2,] -0.070824259 0.272717161 0.28700292 -0.155436045 -0.4352342682
## [3,] -0.040203863 0.074308236 -0.30855477 0.332281463 -0.4809264528
## [4,] -0.133631531 0.002588429 0.28165534 -0.447501040 0.3185520525
## [5,] -0.289490223 0.025848559 -0.23887107 -0.199333019 0.3099524669
## [6,] 0.164511467 -0.074296498 -0.53788417 -0.123192736 -0.0422251407
## [7,] -0.341174683 0.102206106 0.07625616 -0.044899480 0.0131125960
## [8,] 0.253530603 -0.094052534 -0.35143759 0.020656490 0.1302886700
## [9,] 0.006577825 0.040958471 0.35119314 0.561052485 0.1653114830
## [10,] -0.326709318 0.260749930 -0.11357313 0.109283693 0.0002954748
## [11,] 0.346493749 -0.112573322 0.10867903 -0.256824452 -0.1247929485
## [12,] -0.135554623 0.394492714 -0.06021548 -0.181069184 -0.2058000159
## [13,] -0.262221752 0.224234661 -0.17860874 0.142520224 -0.0031618569
## [14,] 0.239382487 0.398902853 0.02336186 -0.046540512 0.0782915146
## [15,] 0.296389066 0.344171483 0.11332132 0.003603282 -0.0198242978
## [16,] 0.261550799 0.396796488 0.03182560 -0.042918369 0.0355921834
## [17,] -0.314269787 0.156878531 -0.16078401 -0.154624485 0.0622390556
## [18,] 0.052012640 0.216986283 -0.05250994 0.347638234 0.4756083660
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.184512237 0.21346024 0.10384131 -0.1823259868 0.146739862
## [2,] 0.422557805 -0.28828834 -0.02014202 0.0173361320 -0.153516571
## [3,] 0.135974612 0.62755349 -0.28741949 0.1987439864 -0.052592552
## [4,] -0.012583172 0.19404714 -0.69612278 0.2333600872 -0.091078294
## [5,] 0.056203434 0.26761677 0.26944058 -0.0785377391 0.113397957
## [6,] 0.020641519 -0.21342686 -0.45139706 -0.4998063612 -0.094856541
## [7,] -0.035486401 0.25203085 0.14649780 0.0559898916 0.096097935
## [8,] -0.165379778 -0.22772769 0.07203104 0.6855710159 -0.346729120
## [9,] -0.457781193 0.05543522 -0.17694017 -0.1778053335 -0.232080793
## [10,] -0.103162212 -0.19722616 -0.05774138 -0.0617307381 -0.150482236
## [11,] -0.116740095 0.19551129 0.05594176 0.0432026399 0.152885263
## [12,] -0.409608346 -0.09430314 -0.02027137 -0.0546977529 -0.081387464
## [13,] -0.129354696 -0.30427392 -0.20921095 0.3106494666 0.627628635
## [14,] 0.045195060 0.12144374 0.02863137 0.0738071301 -0.007002607
## [15,] -0.011773862 -0.01517565 0.01612687 0.0333113458 -0.029803115
## [16,] 0.018993248 0.05518327 0.01608572 0.0035119445 0.019281301
## [17,] 0.004905361 0.08088933 0.16670468 0.0006084543 -0.538659582
## [18,] 0.561823888 -0.03254123 -0.08675391 -0.0051829996 -0.025317734
## [,11] [,12] [,13] [,14] [,15]
## [1,] -0.180363160 -0.023600013 0.2542384320 0.68202755 -0.0828008956
## [2,] -0.027222732 0.054733932 -0.3487539142 0.37746708 -0.1750950498
## [3,] 0.057835517 -0.105690030 -0.0001919315 0.04366578 -0.0455384282
## [4,] -0.009197949 -0.090906464 0.0256227293 0.07723994 0.0197313618
## [5,] 0.268715703 -0.267365890 -0.6263906917 0.02275780 -0.1322662648
## [6,] -0.118214248 0.248508443 -0.2564736833 -0.07773428 0.0366081184
## [7,] -0.708046998 0.449565804 -0.1710234642 -0.10857260 0.0591105485
## [8,] -0.204274792 -0.007602557 -0.2254714613 0.09756041 -0.0811208709
## [9,] 0.106800352 0.165492570 -0.3462697064 0.13804217 -0.1179314599
## [10,] -0.142953789 -0.341984966 0.0847897897 -0.03775593 0.1371303483
## [11,] 0.130000653 0.292146323 -0.0468321569 -0.19306649 -0.4767971297
## [12,] -0.088276694 -0.241501779 0.1128732875 -0.35625009 -0.4638679451
## [13,] 0.282368933 0.310969945 -0.0090699899 0.08997137 -0.0174873128
## [14,] 0.113266835 0.095894782 -0.1101830578 -0.33427267 0.4838202151
## [15,] 0.084137422 -0.035838338 -0.0865875355 0.01189619 0.0679971757
## [16,] 0.001445312 0.023779671 -0.1253595523 -0.11117907 0.1324633991
## [17,] 0.409015896 0.494719583 0.2713377773 0.06075635 -0.0005140677
## [18,] -0.102884356 0.033093202 0.1541482565 -0.18952860 -0.4464680441
## [,16] [,17] [,18]
## [1,] 1.890667e-01 -0.0302745362 -0.0088031031
## [2,] 2.003681e-01 0.0073362787 -0.0018662891
## [3,] -1.609119e-02 -0.0023420877 -0.0026175892
## [4,] -2.368642e-02 -0.0002429731 0.0005551159
## [5,] -2.722990e-02 -0.0380646660 -0.0001182228
## [6,] -3.300720e-02 -0.0510703388 0.0025338796
## [7,] -1.120473e-01 -0.0771994378 -0.0074377618
## [8,] 5.030988e-02 0.0091938249 0.0009956713
## [9,] 1.053868e-01 0.0254230637 0.0010024972
## [10,] -1.344246e-06 0.0368812724 -0.7449356274
## [11,] 9.397018e-02 0.0118552108 -0.5618534911
## [12,] 1.038270e-01 -0.0526394973 0.3550228804
## [13,] -1.749595e-02 -0.0015716126 -0.0020459091
## [14,] 5.704821e-01 -0.2148775540 -0.0039844973
## [15,] -6.595388e-01 -0.5659045067 -0.0442671001
## [16,] -3.214611e-01 0.7844143632 0.0337668745
## [17,] -9.216216e-02 0.0235826920 -0.0083858401
## [18,] 5.544875e-02 -0.0414717440 0.0028755687
#---Scree Plot manual
plot(pc$values, type="b", main="Scree Plot", xlab="Number of Factors", ylab="Eigenvalues")
#---Proporsi kumulatif
cumprop <- cumsum(pc$values) / sum(pc$values)
cumprop
## [1] 0.3299997 0.5276352 0.6223437 0.7033008 0.7768570 0.8288736 0.8645522
## [8] 0.8962159 0.9228680 0.9419706 0.9577692 0.9710205 0.9823569 0.9897398
## [15] 0.9959555 0.9987805 0.9999249 1.0000000
#---Jumlah faktor berdasarkan threshold kumulatif >= 0.8
n_factors <- which(cumprop >= 0.80)[1]
cat("Jumlah faktor berdasarkan threshold kumulatif >= 0.80:", n_factors, "\n")
## Jumlah faktor berdasarkan threshold kumulatif >= 0.80: 6
#---Menghitung loading matrix manual (tanpa rotasi)
L <- matrix(nrow = nrow(pc$vectors), ncol = n_factors)
for (i in 1:n_factors) {
L[, i] <- sqrt(pc$values[i]) * pc$vectors[, i]
}
colnames(L) <- paste0("F", 1:n_factors)
rownames(L) <- colnames(scale_data)
print(L)
## F1 F2
## baseline.value 0.57135866 0.595402349
## accelerations -0.17261361 0.514376666
## fetal_movement -0.09798527 0.140154080
## uterine_contractions -0.32568814 0.004882082
## light_decelerations -0.70554855 0.048753424
## abnormal_short_term_variability 0.40094904 -0.140131940
## mean_value_of_short_term_variability -0.83151444 0.192772746
## percentage_of_time_with_abnormal_long_term_variability 0.61790739 -0.177394149
## mean_value_of_long_term_variability 0.01603154 0.077252497
## histogram_width -0.79625931 0.491805061
## histogram_min 0.84447813 -0.212326535
## histogram_max -0.33037512 0.744059693
## histogram_number_of_peaks -0.63908955 0.422932964
## histogram_mode 0.58342546 0.752377734
## histogram_mean 0.72236248 0.649147928
## histogram_median 0.63745430 0.748404882
## histogram_variance -0.76594155 0.295891375
## histogram_tendency 0.12676574 0.409261671
## F3 F4
## baseline.value -0.26375870 -0.138990823
## accelerations 0.37472880 -0.187635695
## fetal_movement -0.40286823 0.401115861
## uterine_contractions 0.36774666 -0.540203969
## light_decelerations -0.31188487 -0.240626229
## abnormal_short_term_variability -0.70229489 -0.148712961
## mean_value_of_short_term_variability 0.09956477 -0.054200717
## percentage_of_time_with_abnormal_long_term_variability -0.45885869 0.024935624
## mean_value_of_long_term_variability 0.45853952 0.677278380
## histogram_width -0.14828811 0.131922565
## histogram_min 0.14189807 -0.310027410
## histogram_max -0.07862107 -0.218578915
## histogram_number_of_peaks -0.23320263 0.172044272
## histogram_mode 0.03050269 -0.056181700
## histogram_mean 0.14795933 0.004349727
## histogram_median 0.04155348 -0.051809205
## histogram_variance -0.20992956 -0.186656015
## histogram_tendency -0.06856024 0.419653894
## F5
## baseline.value 0.229190530
## accelerations -0.500805248
## fetal_movement -0.553381269
## uterine_contractions 0.366544069
## light_decelerations 0.356648898
## abnormal_short_term_variability -0.048586643
## mean_value_of_short_term_variability 0.015088097
## percentage_of_time_with_abnormal_long_term_variability 0.149917538
## mean_value_of_long_term_variability 0.190216774
## histogram_width 0.000339990
## histogram_min -0.143593848
## histogram_max -0.236805177
## histogram_number_of_peaks -0.003638212
## histogram_mode 0.090086660
## histogram_mean -0.022810962
## histogram_median 0.040954386
## histogram_variance 0.071615789
## histogram_tendency 0.547261976
## F6
## baseline.value -0.178538786
## accelerations 0.408877800
## fetal_movement 0.131572532
## uterine_contractions -0.012175801
## light_decelerations 0.054383888
## abnormal_short_term_variability 0.019973264
## mean_value_of_short_term_variability -0.034337554
## percentage_of_time_with_abnormal_long_term_variability -0.160025726
## mean_value_of_long_term_variability -0.442960856
## histogram_width -0.099822409
## histogram_min -0.112960719
## histogram_max -0.396347570
## histogram_number_of_peaks -0.125166931
## histogram_mode 0.043731902
## histogram_mean -0.011392692
## histogram_median 0.018378355
## histogram_variance 0.004746554
## histogram_tendency 0.543635243
#---FA tanpa rotasi menggunakan `psych::fa`
fa_result <- fa(r = scale_data, covar = TRUE, nfactors = n_factors, rotate = "varimax", scores = "regression")
#---Ambil factor loadings
load_no_rotation <- fa_result$loadings
print(load_no_rotation)
##
## Loadings:
## MR1 MR2 MR3
## baseline.value 0.838 -0.267
## accelerations 0.169 0.205 0.955
## fetal_movement 0.166
## uterine_contractions 0.187 0.100
## light_decelerations 0.681 -0.246 -0.166
## abnormal_short_term_variability -0.222 0.123 -0.360
## mean_value_of_short_term_variability 0.719 -0.270 0.210
## percentage_of_time_with_abnormal_long_term_variability -0.424 0.214 -0.439
## mean_value_of_long_term_variability
## histogram_width 0.954 0.152
## histogram_min -0.848 0.320
## histogram_max 0.681 0.475 0.182
## histogram_number_of_peaks 0.740
## histogram_mode -0.106 0.930
## histogram_mean -0.310 0.911 0.150
## histogram_median -0.157 0.970 0.110
## histogram_variance 0.786 -0.135
## histogram_tendency 0.123 0.342
## MR4 MR5 MR6
## baseline.value 0.127
## accelerations 0.102
## fetal_movement 0.466
## uterine_contractions -0.593
## light_decelerations 0.273 -0.319 0.104
## abnormal_short_term_variability 0.442 0.355
## mean_value_of_short_term_variability -0.247
## percentage_of_time_with_abnormal_long_term_variability 0.198 0.255
## mean_value_of_long_term_variability -0.855
## histogram_width -0.122 0.116
## histogram_min 0.146 -0.117 -0.301
## histogram_max -0.508
## histogram_number_of_peaks 0.148
## histogram_mode 0.135
## histogram_mean -0.120
## histogram_median
## histogram_variance 0.163 -0.150
## histogram_tendency 0.744
##
## MR1 MR2 MR3 MR4 MR5 MR6
## SS loadings 4.710 4.048 1.519 1.170 1.018 0.959
## Proportion Var 0.262 0.225 0.084 0.065 0.057 0.053
## Cumulative Var 0.262 0.487 0.571 0.636 0.692 0.746
#---Visualisasi loading faktor tanpa rotasi
plot(load_no_rotation[, c(1,2)], type = "n", main = "Plot Faktor Tanpa Rotasi")
text(load_no_rotation[, c(1,2)], labels = rownames(load_no_rotation), cex = 0.7)
#---Diagram faktor (tanpa rotasi)
fa.diagram(fa_result)
#---Ambil skor faktornya
fa_scores <- as.data.frame(fa_result$scores)
colnames(fa_scores) <- paste0("F", 1:n_factors)
if (nrow(fa_scores) != nrow(data)) {
stop("Jumlah baris fa_scores dan data$fetal_health berbeda! Cek missing values di scale_data.")
}
#---Gabungkan kembali dengan target
data_fa <- cbind(fa_scores, fetal_health = data$fetal_health)
#---cek kelas
print(table(data_fa$fetal_health))
##
## 1 2 3
## 1646 292 175
#================================= 4. Klasifikasi LDA =======================================#
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3
#---Uji Normalitas Multivariat per kelas (Mardia)
cat("\n=== Uji Normalitas Multivariat (Mardia) ===\n")
##
## === Uji Normalitas Multivariat (Mardia) ===
for (kelas in unique(data_fa$fetal_health)) {
cat("\nKelas:", kelas, "\n")
hasil_mvn <- mvn(data_fa[data_fa$fetal_health == kelas, -which(names(data_fa) == "fetal_health")],
mvnTest = "mardia", multivariatePlot = "none")
print(hasil_mvn$multivariateNormality)
}
##
## Kelas: 2
## Test Statistic p value Result
## 1 Mardia Skewness 1349.72566924878 1.90693331347686e-245 NO
## 2 Mardia Kurtosis 33.2641935630372 0 NO
## 3 MVN <NA> <NA> NO
##
## Kelas: 1
## Test Statistic p value Result
## 1 Mardia Skewness 1745.93628624914 0 NO
## 2 Mardia Kurtosis 4.20548023971821 2.60528352138412e-05 NO
## 3 MVN <NA> <NA> NO
##
## Kelas: 3
## Test Statistic p value Result
## 1 Mardia Skewness 506.064097862599 1.0165114564989e-73 NO
## 2 Mardia Kurtosis 4.59851389839783 4.25515332391768e-06 NO
## 3 MVN <NA> <NA> NO
#---Uji Homogenitas Varians-Kovarians (Box's M Test)
cat("\n=== Uji Homogenitas Varians-Kovarians (Box's M Test) ===\n")
##
## === Uji Homogenitas Varians-Kovarians (Box's M Test) ===
boxm_result <- boxM(data_fa[, -which(names(data_fa) == "fetal_health")], data_fa$fetal_health)
print(boxm_result)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data_fa[, -which(names(data_fa) == "fetal_health")]
## Chi-Sq (approx.) = 1715.4, df = 42, p-value < 2.2e-16
#---Pastikan labelnya bertipe faktor
data_fa$fetal_health <- as.factor(data_fa$fetal_health)
#---lda model
lda_model <- lda(fetal_health ~ ., data = data_fa)
#---uji signifikansi pakai wilks lambda
manova_lda <- manova(as.matrix(data_fa[, -which(names(data_fa) == "fetal_health")]) ~ data_fa$fetal_health)
summary_manova <- summary(manova_lda, test = "Wilks")
cat("\n=== Uji Signifikansi Model LDA (Wilks' Lambda) ===\n")
##
## === Uji Signifikansi Model LDA (Wilks' Lambda) ===
print(summary_manova)
## Df Wilks approx F num Df den Df Pr(>F)
## data_fa$fetal_health 2 0.45808 167.53 12 4210 < 2.2e-16 ***
## Residuals 2110
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#---Uji signifikansi Variabel (Koefisien Diskriminan)
print(round(lda_model$scaling, 4))
## LD1 LD2
## F1 -0.0962 0.4722
## F2 -0.1004 -0.9002
## F3 -0.9254 0.2128
## F4 0.6719 0.4047
## F5 0.7186 -0.2573
## F6 -0.3361 -0.1965
#---prediksi pada data yang sama
lda_pred <- predict(lda_model, newdata = data_fa)
#---confusion
confusion_matrix <- table(Predicted = lda_pred$class, Actual = data_fa$fetal_health)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(confusion_matrix)
## Actual
## Predicted 1 2 3
## 1 1551 97 27
## 2 57 192 49
## 3 38 3 99
#---heatmap
conf_mat_table <- table(Predicted = lda_pred$class, Actual = data_fa$fetal_health)
conf_mat_df <- as.data.frame(conf_mat_table)
ggplot(conf_mat_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile(color = "white") +
geom_text(aes(label = Freq), size = 5, color = "black") +
scale_fill_gradient(low = "lightblue", high = "red") +
labs(title = "Heatmap Confusion Matrix (LDA - In Sample)",
x = "Actual Class", y = "Predicted Class") +
theme_minimal()
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix) * 100
cat("Akurasi:", round(accuracy, 2), "%\n")
## Akurasi: 87.17 %
#===================================== 5. klasifikasi mlr =================================#
library(nnet)
## Warning: package 'nnet' was built under R version 4.4.3
library(caret)
library(ggplot2)
library(reshape2)
#---VIF
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(dplyr)
#---Buat model VIF hanya dari prediktor, tanpa pake target
predictors_only <- dplyr::select(data_fa, -fetal_health)
model_vif <- lm(rep(1, nrow(predictors_only)) ~ ., data = predictors_only)
vif_values <- car::vif(model_vif)
print("VIF antar prediktor:")
## [1] "VIF antar prediktor:"
print(vif_values)
## F1 F2 F3 F4 F5 F6
## 1.000918 1.001370 1.000896 1.002136 1.002308 1.002018
#---Klasikasi Multinomial Regresion
set.seed(123)
data_fa$fetal_health <- as.factor(data_fa$fetal_health)
#--Latih model multinomial logistic regression pada seluruh data
model_multi <- multinom(fetal_health ~ ., data = data_fa)
## # weights: 24 (14 variable)
## initial value 2321.367766
## iter 10 value 707.367031
## iter 20 value 636.637277
## final value 633.481883
## converged
#--Prediksi pada data yang sama (in-sample)
prediksi <- predict(model_multi, newdata = data_fa)
# --- Uji Serentak ---
model_null <- multinom(fetal_health ~ 1, data = data_fa)
## # weights: 6 (2 variable)
## initial value 2321.367766
## iter 10 value 1424.944851
## iter 10 value 1424.944848
## iter 10 value 1424.944848
## final value 1424.944848
## converged
lrt_stat <- 2 * (logLik(model_multi) - logLik(model_null))
df_diff <- attr(logLik(model_multi), "df") - attr(logLik(model_null), "df")
p_value <- pchisq(lrt_stat, df = df_diff, lower.tail = FALSE)
cat("Uji Serentak (Likelihood Ratio Test):\n")
## Uji Serentak (Likelihood Ratio Test):
cat("Statistik LRT =", round(lrt_stat, 3), "\n")
## Statistik LRT = 1582.926
cat("Derajat kebebasan =", df_diff, "\n")
## Derajat kebebasan = 12
cat("P-value =", p_value, "\n")
## P-value = 0
# --- Uji Parsial ---
summary_model <- summary(model_multi)
coefs <- summary_model$coefficients
std_err <- summary_model$standard.errors
z_values <- coefs / std_err
p_values <- 2 * (1 - pnorm(abs(z_values)))
cat("\nUji Parsial (Wald Test) untuk tiap koefisien:\n")
##
## Uji Parsial (Wald Test) untuk tiap koefisien:
print(round(p_values, 4))
## (Intercept) F1 F2 F3 F4 F5 F6
## 2 0 0 0 0 0.0025 0 0
## 3 0 0 0 0 0.0000 0 0
# Confusion Matrix
confusion_matrix <- table(Predicted = prediksi, Actual = data_fa$fetal_health)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(confusion_matrix)
## Actual
## Predicted 1 2 3
## 1 1559 81 29
## 2 53 196 35
## 3 34 15 111
#--Visualisasi confusion matrix sebagai heatmap
cm_df <- as.data.frame(confusion_matrix)
colnames(cm_df) <- c("Predicted", "Actual", "Freq")
ggplot(data = cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile(color = "white") +
geom_text(aes(label = Freq), vjust = 0.5, fontface = "bold", color = "black") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(title = "Confusion Matrix (Multinomial Logistic Regression - In Sample)",
x = "Actual Label", y = "Predicted Label") +
theme_minimal()
#--Hitung akurasi
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
cat("Akurasi:", round(accuracy * 100, 2), "%\n")
## Akurasi: 88.31 %
#---Interpretasi menggunakan odds ratio
odds_ratios <- exp(coefs)
cat("\n=== Odds Ratio ===\n")
##
## === Odds Ratio ===
print(round(odds_ratios, 3))
## (Intercept) F1 F2 F3 F4 F5 F6
## 2 0.015 0.418 1.827 0.038 1.495 4.328 0.658
## 3 0.001 0.522 0.168 0.005 7.054 3.798 0.466
