1. Persiapan Data

# Load Library
library(psych)
library(corrplot)
## corrplot 0.95 loaded
library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(ggplot2)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Hmisc)
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(sjPlot)
## 
## Attaching package: 'sjPlot'
## The following object is masked from 'package:ggplot2':
## 
##     set_theme
library(openxlsx)

# Import Dataset
data <- read.csv('D:/KULIYEAH/Semester 4/Analisis Multivariat/Dataset_Updated.csv')

cat("Dimensi data:", dim(data), "\n")
## Dimensi data: 1205 12
# Ambil variabel numerik
data_num <- data[, sapply(data, is.numeric)]

# Cek Struktur
str(data_num)
## 'data.frame':    1205 obs. of  11 variables:
##  $ Age                   : int  22 22 27 20 20 22 20 23 22 26 ...
##  $ Systolic.BP           : int  90 110 110 100 90 120 110 110 90 110 ...
##  $ Diastolic             : int  60 70 70 70 60 70 70 80 60 70 ...
##  $ BS                    : num  9 7.1 7.5 7.2 7.5 7.01 9 7 6.4 12 ...
##  $ Body.Temp             : int  100 98 98 98 98 98 102 98 98 100 ...
##  $ BMI                   : num  18 20.4 23 21.2 19.7 24 17.6 21.3 22 30.2 ...
##  $ Previous.Complications: int  1 0 1 0 0 0 0 0 0 1 ...
##  $ Preexisting.Diabetes  : int  1 0 0 0 0 0 1 0 0 1 ...
##  $ Gestational.Diabetes  : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Mental.Health         : int  1 0 0 0 0 0 0 0 0 1 ...
##  $ Heart.Rate            : int  80 74 72 74 74 76 78 74 72 80 ...
# Statistik Deskriptif 
summary(data_num)
##       Age          Systolic.BP      Diastolic            BS        
##  Min.   : 10.00   Min.   : 70.0   Min.   : 40.00   Min.   : 3.000  
##  1st Qu.: 21.00   1st Qu.:100.0   1st Qu.: 65.00   1st Qu.: 6.000  
##  Median : 25.00   Median :120.0   Median : 80.00   Median : 6.900  
##  Mean   : 27.73   Mean   :116.8   Mean   : 77.17   Mean   : 7.501  
##  3rd Qu.: 32.00   3rd Qu.:130.0   3rd Qu.: 90.00   3rd Qu.: 7.900  
##  Max.   :325.00   Max.   :200.0   Max.   :140.00   Max.   :19.000  
##                   NA's   :5       NA's   :4        NA's   :2       
##    Body.Temp          BMI        Previous.Complications Preexisting.Diabetes
##  Min.   : 97.0   Min.   : 0.00   Min.   :0.0000         Min.   :0.0000      
##  1st Qu.: 98.0   1st Qu.:20.45   1st Qu.:0.0000         1st Qu.:0.0000      
##  Median : 98.0   Median :23.00   Median :0.0000         Median :0.0000      
##  Mean   : 98.4   Mean   :23.32   Mean   :0.1754         Mean   :0.2884      
##  3rd Qu.: 98.0   3rd Qu.:25.00   3rd Qu.:0.0000         3rd Qu.:1.0000      
##  Max.   :103.0   Max.   :37.00   Max.   :1.0000         Max.   :1.0000      
##                  NA's   :18      NA's   :2              NA's   :2           
##  Gestational.Diabetes Mental.Health      Heart.Rate   
##  Min.   :0.0000       Min.   :0.0000   Min.   :58.00  
##  1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:70.00  
##  Median :0.0000       Median :0.0000   Median :76.00  
##  Mean   :0.1178       Mean   :0.3344   Mean   :75.82  
##  3rd Qu.:0.0000       3rd Qu.:1.0000   3rd Qu.:80.00  
##  Max.   :1.0000       Max.   :1.0000   Max.   :92.00  
##                                        NA's   :2
describe(data_num)
## data_num 
## 
##  11  Variables      1205  Observations
## --------------------------------------------------------------------------------
## Age 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       43    0.998    27.73     26.5    10.19       17 
##      .10      .25      .50      .75      .90      .95 
##       18       21       25       32       39       48 
## 
## lowest :  10  12  13  15  16, highest:  55  60  63  65 325
## --------------------------------------------------------------------------------
## Systolic.BP 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1200        5       24    0.964    116.8      115    20.67       90 
##      .10      .25      .50      .75      .90      .95 
##       90      100      120      130      140      140 
## 
## lowest :  70  75  76  78  80, highest: 150 160 170 180 200
## --------------------------------------------------------------------------------
## Diastolic 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1201        4       21    0.975    77.17     76.5    16.07       60 
##      .10      .25      .50      .75      .90      .95 
##       60       65       80       90      100      100 
## 
## lowest :  40  45  49  50  55, highest: 100 110 120 130 140
## --------------------------------------------------------------------------------
## BS 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1203        2       84    0.998    7.501     6.95    3.061      3.9 
##      .10      .25      .50      .75      .90      .95 
##      4.1      6.0      6.9      7.9     11.0     15.0 
## 
## lowest : 3    3.01 3.3  3.4  3.5 , highest: 15   16   17   18   19  
## --------------------------------------------------------------------------------
## Body.Temp 
##        n  missing distinct     Info     Mean  pMedian      Gmd 
##     1205        0        7    0.375     98.4       98   0.7403 
##                                                     
## Value         97    98    99   100   101   102   103
## Frequency     12  1030     7    41    66    43     6
## Proportion 0.010 0.855 0.006 0.034 0.055 0.036 0.005
## --------------------------------------------------------------------------------
## BMI 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1187       18      157    0.998    23.32       23    4.282    18.00 
##      .10      .25      .50      .75      .90      .95 
##    18.90    20.45    23.00    25.00    29.00    30.27 
## 
## lowest : 0    15   15.5 15.6 15.9, highest: 34.5 35   35.1 36   37  
## --------------------------------------------------------------------------------
## Previous.Complications 
##        n  missing distinct     Info      Sum     Mean 
##     1203        2        2    0.434      211   0.1754 
## 
## --------------------------------------------------------------------------------
## Preexisting.Diabetes 
##        n  missing distinct     Info      Sum     Mean 
##     1203        2        2    0.616      347   0.2884 
## 
## --------------------------------------------------------------------------------
## Gestational.Diabetes 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.312      142   0.1178 
## 
## --------------------------------------------------------------------------------
## Mental.Health 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.668      403   0.3344 
## 
## --------------------------------------------------------------------------------
## Heart.Rate 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1203        2       31     0.99    75.82       75    8.032       66 
##      .10      .25      .50      .75      .90      .95 
##       70       70       76       80       88       90 
## 
## lowest : 58 60 62 64 65, highest: 87 88 89 90 92
## --------------------------------------------------------------------------------
# Cek Missing Value
colSums(is.na(data_num))
##                    Age            Systolic.BP              Diastolic 
##                      0                      5                      4 
##                     BS              Body.Temp                    BMI 
##                      2                      0                     18 
## Previous.Complications   Preexisting.Diabetes   Gestational.Diabetes 
##                      2                      2                      0 
##          Mental.Health             Heart.Rate 
##                      0                      2
# Handling Missing Value
data_num1 <- data.frame(lapply(data_num, function(x) {
  x[is.na(x)] <- median(x, na.rm = TRUE)
  return(x)
}))

# Cek Missing Value Setelah Handling
colSums(is.na(data_num1))
##                    Age            Systolic.BP              Diastolic 
##                      0                      0                      0 
##                     BS              Body.Temp                    BMI 
##                      0                      0                      0 
## Previous.Complications   Preexisting.Diabetes   Gestational.Diabetes 
##                      0                      0                      0 
##          Mental.Health             Heart.Rate 
##                      0                      0
summary(data_num1)
##       Age          Systolic.BP      Diastolic            BS      
##  Min.   : 10.00   Min.   : 70.0   Min.   : 40.00   Min.   : 3.0  
##  1st Qu.: 21.00   1st Qu.:100.0   1st Qu.: 65.00   1st Qu.: 6.0  
##  Median : 25.00   Median :120.0   Median : 80.00   Median : 6.9  
##  Mean   : 27.73   Mean   :116.8   Mean   : 77.18   Mean   : 7.5  
##  3rd Qu.: 32.00   3rd Qu.:130.0   3rd Qu.: 90.00   3rd Qu.: 7.9  
##  Max.   :325.00   Max.   :200.0   Max.   :140.00   Max.   :19.0  
##    Body.Temp          BMI        Previous.Complications Preexisting.Diabetes
##  Min.   : 97.0   Min.   : 0.00   Min.   :0.0000         Min.   :0.000       
##  1st Qu.: 98.0   1st Qu.:21.00   1st Qu.:0.0000         1st Qu.:0.000       
##  Median : 98.0   Median :23.00   Median :0.0000         Median :0.000       
##  Mean   : 98.4   Mean   :23.31   Mean   :0.1751         Mean   :0.288       
##  3rd Qu.: 98.0   3rd Qu.:25.00   3rd Qu.:0.0000         3rd Qu.:1.000       
##  Max.   :103.0   Max.   :37.00   Max.   :1.0000         Max.   :1.000       
##  Gestational.Diabetes Mental.Health      Heart.Rate   
##  Min.   :0.0000       Min.   :0.0000   Min.   :58.00  
##  1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:70.00  
##  Median :0.0000       Median :0.0000   Median :76.00  
##  Mean   :0.1178       Mean   :0.3344   Mean   :75.82  
##  3rd Qu.:0.0000       3rd Qu.:1.0000   3rd Qu.:80.00  
##  Max.   :1.0000       Max.   :1.0000   Max.   :92.00
describe(data_num1)
## data_num1 
## 
##  11  Variables      1205  Observations
## --------------------------------------------------------------------------------
## Age 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       43    0.998    27.73     26.5    10.19       17 
##      .10      .25      .50      .75      .90      .95 
##       18       21       25       32       39       48 
## 
## lowest :  10  12  13  15  16, highest:  55  60  63  65 325
## --------------------------------------------------------------------------------
## Systolic.BP 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       24    0.964    116.8      115    20.62       90 
##      .10      .25      .50      .75      .90      .95 
##       90      100      120      130      140      140 
## 
## lowest :  70  75  76  78  80, highest: 150 160 170 180 200
## --------------------------------------------------------------------------------
## Diastolic 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       21    0.975    77.18     76.5    16.04       60 
##      .10      .25      .50      .75      .90      .95 
##       60       65       80       90      100      100 
## 
## lowest :  40  45  49  50  55, highest: 100 110 120 130 140
## --------------------------------------------------------------------------------
## BS 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       84    0.998      7.5     6.95    3.057      3.9 
##      .10      .25      .50      .75      .90      .95 
##      4.1      6.0      6.9      7.9     11.0     15.0 
## 
## lowest : 3    3.01 3.3  3.4  3.5 , highest: 15   16   17   18   19  
## --------------------------------------------------------------------------------
## Body.Temp 
##        n  missing distinct     Info     Mean  pMedian      Gmd 
##     1205        0        7    0.375     98.4       98   0.7403 
##                                                     
## Value         97    98    99   100   101   102   103
## Frequency     12  1030     7    41    66    43     6
## Proportion 0.010 0.855 0.006 0.034 0.055 0.036 0.005
## --------------------------------------------------------------------------------
## BMI 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0      157    0.997    23.31       23    4.243    18.00 
##      .10      .25      .50      .75      .90      .95 
##    18.90    21.00    23.00    25.00    28.96    30.20 
## 
## lowest : 0    15   15.5 15.6 15.9, highest: 34.5 35   35.1 36   37  
## --------------------------------------------------------------------------------
## Previous.Complications 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.433      211   0.1751 
## 
## --------------------------------------------------------------------------------
## Preexisting.Diabetes 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.615      347    0.288 
## 
## --------------------------------------------------------------------------------
## Gestational.Diabetes 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.312      142   0.1178 
## 
## --------------------------------------------------------------------------------
## Mental.Health 
##        n  missing distinct     Info      Sum     Mean 
##     1205        0        2    0.668      403   0.3344 
## 
## --------------------------------------------------------------------------------
## Heart.Rate 
##        n  missing distinct     Info     Mean  pMedian      Gmd      .05 
##     1205        0       31     0.99    75.82       75    8.025       66 
##      .10      .25      .50      .75      .90      .95 
##       70       70       76       80       88       90 
## 
## lowest : 58 60 62 64 65, highest: 87 88 89 90 92
## --------------------------------------------------------------------------------
desc <- describe(data_num1)

# Simpan dalam Bentuk Tabel
desc_table <- data.frame(
  Variable = rownames(desc),
  Mean = round(as.numeric(desc$mean), 3),
  Median = round(as.numeric(desc$median), 3),
  SD = round(as.numeric(desc$sd), 3),
  Min = as.numeric(desc$min),
  Max = as.numeric(desc$max)
)

write.xlsx(desc_table, "tabel_deskriptif.xlsx", rowNames = FALSE)

# Plot Distribusi Tiap Variabel
data_long <- stack(data_num1)

ggplot(data_long, aes(x = values)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +
  facet_wrap(~ ind, scales = "free") +
  theme_minimal()

data_long <- data_num1 %>%
  pivot_longer(cols = everything(),
               names_to = "Variable",
               values_to = "Value")

ggplot(data_long, aes(x = Variable, y = Value)) +
  geom_boxplot(fill = "#69b3a2", alpha = 0.7) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Boxplot Seluruh Variabel Numerik",
       x = "Variabel",
       y = "Nilai")

# Standarisasi Skala
data_scaled <- scale(data_num1)

data_scaled_long <- as.data.frame(data_scaled) %>%
  pivot_longer(cols = everything(),
               names_to = "Variable",
               values_to = "Value")

ggplot(data_scaled_long, aes(x = Variable, y = Value)) +
  geom_boxplot(fill = "#E69F00", alpha = 0.7) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Boxplot Setelah Standarisasi",
       y = "Z-Score")

2. Uji Asumsi

2a. Uji Korelasi

cor_matrix <- cor(data_num1, method = "pearson")
cor_matrix
##                                Age Systolic.BP  Diastolic        BS   Body.Temp
## Age                     1.00000000   0.1598061  0.1558714 0.4306982 -0.08900263
## Systolic.BP             0.15980605   1.0000000  0.7931054 0.3420373 -0.17810105
## Diastolic               0.15587136   0.7931054  1.0000000 0.3613747 -0.11861485
## BS                      0.43069816   0.3420373  0.3613747 1.0000000  0.08005480
## Body.Temp              -0.08900263  -0.1781011 -0.1186149 0.0800548  1.00000000
## BMI                     0.23519153   0.2850881  0.2707933 0.4913860  0.02153467
## Previous.Complications  0.14276178   0.1331392  0.1822971 0.4221103  0.11335281
## Preexisting.Diabetes    0.17749855   0.1649305  0.1929384 0.5487575  0.06846207
## Gestational.Diabetes   -0.04996601   0.1378353  0.1570357 0.1129843  0.15565044
## Mental.Health           0.07657281   0.1844298  0.1954217 0.3812072  0.13496110
## Heart.Rate              0.06289193   0.1562080  0.1667512 0.2981745  0.09797216
##                               BMI Previous.Complications Preexisting.Diabetes
## Age                    0.23519153              0.1427618           0.17749855
## Systolic.BP            0.28508814              0.1331392           0.16493053
## Diastolic              0.27079328              0.1822971           0.19293842
## BS                     0.49138603              0.4221103           0.54875755
## Body.Temp              0.02153467              0.1133528           0.06846207
## BMI                    1.00000000              0.3696315           0.29121565
## Previous.Complications 0.36963145              1.0000000           0.36763890
## Preexisting.Diabetes   0.29121565              0.3676389           1.00000000
## Gestational.Diabetes   0.31234936              0.2108601          -0.04485550
## Mental.Health          0.32081732              0.4509411           0.43098666
## Heart.Rate             0.21483602              0.2769475           0.37382313
##                        Gestational.Diabetes Mental.Health Heart.Rate
## Age                             -0.04996601    0.07657281 0.06289193
## Systolic.BP                      0.13783532    0.18442979 0.15620798
## Diastolic                        0.15703567    0.19542169 0.16675118
## BS                               0.11298426    0.38120718 0.29817446
## Body.Temp                        0.15565044    0.13496110 0.09797216
## BMI                              0.31234936    0.32081732 0.21483602
## Previous.Complications           0.21086012    0.45094115 0.27694750
## Preexisting.Diabetes            -0.04485550    0.43098666 0.37382313
## Gestational.Diabetes             1.00000000    0.24282292 0.18717567
## Mental.Health                    0.24282292    1.00000000 0.32594303
## Heart.Rate                       0.18717567    0.32594303 1.00000000
cor_test <- rcorr(as.matrix(data_num1))

cor_test$r
##                                Age Systolic.BP  Diastolic        BS   Body.Temp
## Age                     1.00000000   0.1598061  0.1558714 0.4306982 -0.08900263
## Systolic.BP             0.15980605   1.0000000  0.7931054 0.3420373 -0.17810105
## Diastolic               0.15587136   0.7931054  1.0000000 0.3613747 -0.11861485
## BS                      0.43069816   0.3420373  0.3613747 1.0000000  0.08005480
## Body.Temp              -0.08900263  -0.1781011 -0.1186149 0.0800548  1.00000000
## BMI                     0.23519153   0.2850881  0.2707933 0.4913860  0.02153467
## Previous.Complications  0.14276178   0.1331392  0.1822971 0.4221103  0.11335281
## Preexisting.Diabetes    0.17749855   0.1649305  0.1929384 0.5487575  0.06846207
## Gestational.Diabetes   -0.04996601   0.1378353  0.1570357 0.1129843  0.15565044
## Mental.Health           0.07657281   0.1844298  0.1954217 0.3812072  0.13496110
## Heart.Rate              0.06289193   0.1562080  0.1667512 0.2981745  0.09797216
##                               BMI Previous.Complications Preexisting.Diabetes
## Age                    0.23519153              0.1427618           0.17749855
## Systolic.BP            0.28508814              0.1331392           0.16493053
## Diastolic              0.27079328              0.1822971           0.19293842
## BS                     0.49138603              0.4221103           0.54875755
## Body.Temp              0.02153467              0.1133528           0.06846207
## BMI                    1.00000000              0.3696315           0.29121565
## Previous.Complications 0.36963145              1.0000000           0.36763890
## Preexisting.Diabetes   0.29121565              0.3676389           1.00000000
## Gestational.Diabetes   0.31234936              0.2108601          -0.04485550
## Mental.Health          0.32081732              0.4509411           0.43098666
## Heart.Rate             0.21483602              0.2769475           0.37382313
##                        Gestational.Diabetes Mental.Health Heart.Rate
## Age                             -0.04996601    0.07657281 0.06289193
## Systolic.BP                      0.13783532    0.18442979 0.15620798
## Diastolic                        0.15703567    0.19542169 0.16675118
## BS                               0.11298426    0.38120718 0.29817446
## Body.Temp                        0.15565044    0.13496110 0.09797216
## BMI                              0.31234936    0.32081732 0.21483602
## Previous.Complications           0.21086012    0.45094115 0.27694750
## Preexisting.Diabetes            -0.04485550    0.43098666 0.37382313
## Gestational.Diabetes             1.00000000    0.24282292 0.18717567
## Mental.Health                    0.24282292    1.00000000 0.32594303
## Heart.Rate                       0.18717567    0.32594303 1.00000000
cor_test$P
##                                 Age  Systolic.BP    Diastolic           BS
## Age                              NA 2.440158e-08 5.375017e-08 0.0000000000
## Systolic.BP            2.440158e-08           NA 0.000000e+00 0.0000000000
## Diastolic              5.375017e-08 0.000000e+00           NA 0.0000000000
## BS                     0.000000e+00 0.000000e+00 0.000000e+00           NA
## Body.Temp              1.984847e-03 4.789085e-10 3.662551e-05 0.0054270170
## BMI                    0.000000e+00 0.000000e+00 0.000000e+00 0.0000000000
## Previous.Complications 6.488799e-07 3.525848e-06 1.829386e-10 0.0000000000
## Preexisting.Diabetes   5.488454e-10 8.471870e-09 1.437250e-11 0.0000000000
## Gestational.Diabetes   8.296097e-02 1.565660e-06 4.263626e-08 0.0000847495
## Mental.Health          7.832021e-03 1.111871e-10 7.769785e-12 0.0000000000
## Heart.Rate             2.903066e-02 5.027717e-08 5.771037e-09 0.0000000000
##                           Body.Temp          BMI Previous.Complications
## Age                    1.984847e-03 0.000000e+00           6.488799e-07
## Systolic.BP            4.789085e-10 0.000000e+00           3.525848e-06
## Diastolic              3.662551e-05 0.000000e+00           1.829386e-10
## BS                     5.427017e-03 0.000000e+00           0.000000e+00
## Body.Temp                        NA 4.551563e-01           8.031419e-05
## BMI                    4.551563e-01           NA           0.000000e+00
## Previous.Complications 8.031419e-05 0.000000e+00                     NA
## Preexisting.Diabetes   1.746100e-02 0.000000e+00           0.000000e+00
## Gestational.Diabetes   5.615451e-08 0.000000e+00           1.412204e-13
## Mental.Health          2.581523e-06 0.000000e+00           0.000000e+00
## Heart.Rate             6.602262e-04 4.773959e-14           0.000000e+00
##                        Preexisting.Diabetes Gestational.Diabetes Mental.Health
## Age                            5.488454e-10         8.296097e-02  7.832021e-03
## Systolic.BP                    8.471870e-09         1.565660e-06  1.111871e-10
## Diastolic                      1.437250e-11         4.263626e-08  7.769785e-12
## BS                             0.000000e+00         8.474950e-05  0.000000e+00
## Body.Temp                      1.746100e-02         5.615451e-08  2.581523e-06
## BMI                            0.000000e+00         0.000000e+00  0.000000e+00
## Previous.Complications         0.000000e+00         1.412204e-13  0.000000e+00
## Preexisting.Diabetes                     NA         1.196508e-01  0.000000e+00
## Gestational.Diabetes           1.196508e-01                   NA  0.000000e+00
## Mental.Health                  0.000000e+00         0.000000e+00            NA
## Heart.Rate                     0.000000e+00         5.805356e-11  0.000000e+00
##                          Heart.Rate
## Age                    2.903066e-02
## Systolic.BP            5.027717e-08
## Diastolic              5.771037e-09
## BS                     0.000000e+00
## Body.Temp              6.602262e-04
## BMI                    4.773959e-14
## Previous.Complications 0.000000e+00
## Preexisting.Diabetes   0.000000e+00
## Gestational.Diabetes   5.805356e-11
## Mental.Health          0.000000e+00
## Heart.Rate                       NA
corrplot(cor_test$r,
         method = "square", 
         type = "full",
         addCoef.col = "black", 
         number.cex = 0.3,
         tl.col = "black",
         tl.cex = 0.6, 
         tl.srt = 45)

tab_corr(data_num1, digits = 3, file = "tabel_korelasi.doc") 
  Age Systolic.BP Diastolic BS Body.Temp BMI Previous.Complications Preexisting.Diabetes Gestational.Diabetes Mental.Health Heart.Rate
Age   0.160*** 0.156*** 0.431*** -0.089** 0.235*** 0.143*** 0.177*** -0.050 0.077** 0.063*
Systolic.BP 0.160***   0.793*** 0.342*** -0.178*** 0.285*** 0.133*** 0.165*** 0.138*** 0.184*** 0.156***
Diastolic 0.156*** 0.793***   0.361*** -0.119*** 0.271*** 0.182*** 0.193*** 0.157*** 0.195*** 0.167***
BS 0.431*** 0.342*** 0.361***   0.080** 0.491*** 0.422*** 0.549*** 0.113*** 0.381*** 0.298***
Body.Temp -0.089** -0.178*** -0.119*** 0.080**   0.022 0.113*** 0.068* 0.156*** 0.135*** 0.098***
BMI 0.235*** 0.285*** 0.271*** 0.491*** 0.022   0.370*** 0.291*** 0.312*** 0.321*** 0.215***
Previous.Complications 0.143*** 0.133*** 0.182*** 0.422*** 0.113*** 0.370***   0.368*** 0.211*** 0.451*** 0.277***
Preexisting.Diabetes 0.177*** 0.165*** 0.193*** 0.549*** 0.068* 0.291*** 0.368***   -0.045 0.431*** 0.374***
Gestational.Diabetes -0.050 0.138*** 0.157*** 0.113*** 0.156*** 0.312*** 0.211*** -0.045   0.243*** 0.187***
Mental.Health 0.077** 0.184*** 0.195*** 0.381*** 0.135*** 0.321*** 0.451*** 0.431*** 0.243***   0.326***
Heart.Rate 0.063* 0.156*** 0.167*** 0.298*** 0.098*** 0.215*** 0.277*** 0.374*** 0.187*** 0.326***  
Computed correlation used pearson-method with listwise-deletion.

2b. Uji Bartlett

cor_matrix <- cor(data_num1)

bartlett_test <- cortest.bartlett(cor_matrix, n = nrow(data_num1))

bartlett_test
## $chisq
## [1] 3861.738
## 
## $p.value
## [1] 0
## 
## $df
## [1] 55

2c. Uji MSA

cor_matrix <- cor(data_num1)

kmo_result <- KMO(cor_matrix)

kmo_result
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor_matrix)
## Overall MSA =  0.74
## MSA for each item = 
##                    Age            Systolic.BP              Diastolic 
##                   0.68                   0.62                   0.64 
##                     BS              Body.Temp                    BMI 
##                   0.78                   0.62                   0.84 
## Previous.Complications   Preexisting.Diabetes   Gestational.Diabetes 
##                   0.87                   0.75                   0.61 
##          Mental.Health             Heart.Rate 
##                   0.84                   0.86

3. Principal Component Analysis (PCA)

3a. Total Variance Explained

# 1. PCA tanpa rotasi
pca_result <- prcomp(data_num1, scale. = TRUE)

# Eigenvalue
eigenvalues <- pca_result$sdev^2

# Proporsi Varians
prop_var <- eigenvalues / sum(eigenvalues)

# Kumulatif Varians
cum_var <- cumsum(prop_var)

# Buat tabel Total Variance Explained
total_variance <- data.frame(
  Component = 1:length(eigenvalues),
  Eigenvalue = eigenvalues,
  Proportion_Variance = prop_var,
  Cumulative_Variance = cum_var
)

round(total_variance, 4)
##    Component Eigenvalue Proportion_Variance Cumulative_Variance
## 1          1     3.4778              0.3162              0.3162
## 2          2     1.6141              0.1467              0.4629
## 3          3     1.2551              0.1141              0.5770
## 4          4     0.9735              0.0885              0.6655
## 5          5     0.8201              0.0746              0.7401
## 6          6     0.7345              0.0668              0.8068
## 7          7     0.6005              0.0546              0.8614
## 8          8     0.5484              0.0499              0.9113
## 9          9     0.4517              0.0411              0.9523
## 10        10     0.3227              0.0293              0.9817
## 11        11     0.2016              0.0183              1.0000
total_variance$Proportion_Variance <- total_variance$Proportion_Variance * 100
total_variance$Cumulative_Variance <- total_variance$Cumulative_Variance * 100

round(total_variance, 2)
##    Component Eigenvalue Proportion_Variance Cumulative_Variance
## 1          1       3.48               31.62               31.62
## 2          2       1.61               14.67               46.29
## 3          3       1.26               11.41               57.70
## 4          4       0.97                8.85               66.55
## 5          5       0.82                7.46               74.01
## 6          6       0.73                6.68               80.68
## 7          7       0.60                5.46               86.14
## 8          8       0.55                4.99               91.13
## 9          9       0.45                4.11               95.23
## 10        10       0.32                2.93               98.17
## 11        11       0.20                1.83              100.00
# 2. PCA
pca_result <- prcomp(data_num1, scale. = TRUE)

# Hitung eigenvalue
eigenvalues <- pca_result$sdev^2

# Hitung persen varians
percent_var <- eigenvalues / sum(eigenvalues) * 100

# Hitung kumulatif
cumulative_var <- cumsum(percent_var)

# Buat tabel seperti SPSS
total_variance <- data.frame(
  Komponen = 1:length(eigenvalues),
  Total = round(eigenvalues, 2),
  `Persen Varians` = round(percent_var, 2),
  `Kumulatif %` = round(cumulative_var, 2)
)

total_variance
##    Komponen Total Persen.Varians Kumulatif..
## 1         1  3.48          31.62       31.62
## 2         2  1.61          14.67       46.29
## 3         3  1.26          11.41       57.70
## 4         4  0.97           8.85       66.55
## 5         5  0.82           7.46       74.01
## 6         6  0.73           6.68       80.68
## 7         7  0.60           5.46       86.14
## 8         8  0.55           4.99       91.13
## 9         9  0.45           4.11       95.23
## 10       10  0.32           2.93       98.17
## 11       11  0.20           1.83      100.00

3b. Scree Plot

# Hitung eigenvalue
eigenvalues <- pca_result$sdev^2

plot(eigenvalues,
     type = "b",
     pch = 19,
     xlab = "Komponen",
     ylab = "Eigenvalue",
     main = "Scree Plot PCA",
     ylim = c(0, max(eigenvalues)))

abline(h = 1, col = "red", lty = 2, lwd = 2)

3c. Communalities

# Tentukan jumlah komponen (eigenvalue > 1)
eigenvalues <- pca_result$sdev^2
k <- sum(eigenvalues > 1)

# Jalankan PCA lagi dengan k komponen
pca_k <- prcomp(data_num1, scale. = TRUE)

# Ambil loading
loadings <- pca_k$rotation[, 1:k]

# Hitung communalities (jumlah kuadrat loading)
communalities <- rowSums(loadings^2)

# Buat tabel
communalities_table <- data.frame(
  Variabel = rownames(loadings),
  Initial = 1,
  Extraction = round(communalities, 3)
)

communalities_table
##                                      Variabel Initial Extraction
## Age                                       Age       1      0.312
## Systolic.BP                       Systolic.BP       1      0.423
## Diastolic                           Diastolic       1      0.397
## BS                                         BS       1      0.251
## Body.Temp                           Body.Temp       1      0.259
## BMI                                       BMI       1      0.132
## Previous.Complications Previous.Complications       1      0.178
## Preexisting.Diabetes     Preexisting.Diabetes       1      0.272
## Gestational.Diabetes     Gestational.Diabetes       1      0.457
## Mental.Health                   Mental.Health       1      0.194
## Heart.Rate                         Heart.Rate       1      0.124
# Tentukan jumlah komponen
eigenvalues <- pca_result$sdev^2
k <- sum(eigenvalues > 1)

# Jalankan PCA dengan psych agar lebih mudah
library(psych)

pca_psych <- principal(data_num1, nfactors = k, rotate = "none")

# Ambil communalities (Extraction)
communalities <- pca_psych$communality

# Buat tabel seperti di gambar
comm_table <- data.frame(
  Variabel = names(communalities),
  Extraction = round(communalities, 6)
)

comm_table
##                                      Variabel Extraction
## Age                                       Age   0.493461
## Systolic.BP                       Systolic.BP   0.838861
## Diastolic                           Diastolic   0.809065
## BS                                         BS   0.713695
## Body.Temp                           Body.Temp   0.406529
## BMI                                       BMI   0.446322
## Previous.Complications Previous.Complications   0.500155
## Preexisting.Diabetes     Preexisting.Diabetes   0.616487
## Gestational.Diabetes     Gestational.Diabetes   0.650558
## Mental.Health                   Mental.Health   0.529467
## Heart.Rate                         Heart.Rate   0.342479

3d. Pengelompokan faktor-faktor utama

# Tentukan jumlah komponen (misal eigenvalue > 1)
ev <- eigen(cor(data_num1))$values
k <- sum(ev > 1)

# Jalankan PCA dengan rotasi varimax
pca_rot <- principal(data_num1,
                     nfactors = k,
                     rotate = "varimax",
                     scores = TRUE)

# Lihat hasil loading
print(pca_rot$loadings)
## 
## Loadings:
##                        RC1    RC2    RC3   
## Age                     0.301  0.179 -0.609
## Systolic.BP             0.149  0.904       
## Diastolic               0.191  0.879       
## BS                      0.731  0.280 -0.318
## Body.Temp               0.317 -0.384  0.398
## BMI                     0.580  0.326       
## Previous.Complications  0.702              
## Preexisting.Diabetes    0.716        -0.321
## Gestational.Diabetes    0.287  0.236  0.716
## Mental.Health           0.705         0.168
## Heart.Rate              0.569         0.124
## 
##                  RC1   RC2   RC3
## SS loadings    3.029 2.018 1.300
## Proportion Var 0.275 0.183 0.118
## Cumulative Var 0.275 0.459 0.577
loadings_matrix <- as.data.frame(unclass(pca_rot$loadings))

round(loadings_matrix, 3)
##                          RC1    RC2    RC3
## Age                    0.301  0.179 -0.609
## Systolic.BP            0.149  0.904 -0.017
## Diastolic              0.191  0.879  0.009
## BS                     0.731  0.280 -0.318
## Body.Temp              0.317 -0.384  0.398
## BMI                    0.580  0.326  0.058
## Previous.Complications 0.702  0.043  0.075
## Preexisting.Diabetes   0.716  0.002 -0.321
## Gestational.Diabetes   0.287  0.236  0.716
## Mental.Health          0.705  0.065  0.168
## Heart.Rate             0.569  0.053  0.124
# Ambil loading absolut terbesar tiap variabel
grouping <- apply(abs(loadings_matrix), 1, which.max)

# Buat tabel pengelompokan
hasil_kelompok <- data.frame(
  Variabel = rownames(loadings_matrix),
  Komponen_Dominan = paste0("Komponen ", grouping)
)

hasil_kelompok
##                  Variabel Komponen_Dominan
## 1                     Age       Komponen 3
## 2             Systolic.BP       Komponen 2
## 3               Diastolic       Komponen 2
## 4                      BS       Komponen 1
## 5               Body.Temp       Komponen 3
## 6                     BMI       Komponen 1
## 7  Previous.Complications       Komponen 1
## 8    Preexisting.Diabetes       Komponen 1
## 9    Gestational.Diabetes       Komponen 3
## 10          Mental.Health       Komponen 1
## 11             Heart.Rate       Komponen 1
# Ambil rotated loadings
loadings_matrix <- as.data.frame(unclass(pca_rot$loadings))

# Bulatkan
loadings_matrix <- round(loadings_matrix, 3)

# Ganti nama kolom sesuai jumlah komponen
colnames(loadings_matrix) <- paste0("Komponen ", 1:ncol(loadings_matrix))

# Tambahkan nama variabel sebagai kolom
loadings_matrix$Variabel <- rownames(loadings_matrix)

# Pindahkan kolom Variabel ke depan
loadings_matrix <- loadings_matrix[, c("Variabel", paste0("Komponen ", 1:ncol(pca_rot$loadings)))]

loadings_matrix
##                                      Variabel Komponen 1 Komponen 2 Komponen 3
## Age                                       Age      0.301      0.179     -0.609
## Systolic.BP                       Systolic.BP      0.149      0.904     -0.017
## Diastolic                           Diastolic      0.191      0.879      0.009
## BS                                         BS      0.731      0.280     -0.318
## Body.Temp                           Body.Temp      0.317     -0.384      0.398
## BMI                                       BMI      0.580      0.326      0.058
## Previous.Complications Previous.Complications      0.702      0.043      0.075
## Preexisting.Diabetes     Preexisting.Diabetes      0.716      0.002     -0.321
## Gestational.Diabetes     Gestational.Diabetes      0.287      0.236      0.716
## Mental.Health                   Mental.Health      0.705      0.065      0.168
## Heart.Rate                         Heart.Rate      0.569      0.053      0.124
dominant <- apply(abs(loadings_matrix[, -1]), 1, which.max)

loadings_matrix$Dominan <- paste0("Komponen ", dominant)

loadings_matrix
##                                      Variabel Komponen 1 Komponen 2 Komponen 3
## Age                                       Age      0.301      0.179     -0.609
## Systolic.BP                       Systolic.BP      0.149      0.904     -0.017
## Diastolic                           Diastolic      0.191      0.879      0.009
## BS                                         BS      0.731      0.280     -0.318
## Body.Temp                           Body.Temp      0.317     -0.384      0.398
## BMI                                       BMI      0.580      0.326      0.058
## Previous.Complications Previous.Complications      0.702      0.043      0.075
## Preexisting.Diabetes     Preexisting.Diabetes      0.716      0.002     -0.321
## Gestational.Diabetes     Gestational.Diabetes      0.287      0.236      0.716
## Mental.Health                   Mental.Health      0.705      0.065      0.168
## Heart.Rate                         Heart.Rate      0.569      0.053      0.124
##                           Dominan
## Age                    Komponen 3
## Systolic.BP            Komponen 2
## Diastolic              Komponen 2
## BS                     Komponen 1
## Body.Temp              Komponen 3
## BMI                    Komponen 1
## Previous.Complications Komponen 1
## Preexisting.Diabetes   Komponen 1
## Gestational.Diabetes   Komponen 3
## Mental.Health          Komponen 1
## Heart.Rate             Komponen 1

3e. Komponen transformasi matriks

pca_result <- principal(data_num1,
                        nfactors = 3,
                        rotate = "varimax")
# Menampilkan matriks transformasi komponen
pca_result$rot.mat
##            [,1]       [,2]        [,3]
## [1,]  0.8728880 -0.4853979 -0.04955209
## [2,]  0.4728276  0.8164489  0.33142911
## [3,] -0.1204182 -0.3127301  0.94217797
round(pca_result$rot.mat, 3)
##        [,1]   [,2]   [,3]
## [1,]  0.873 -0.485 -0.050
## [2,]  0.473  0.816  0.331
## [3,] -0.120 -0.313  0.942
round(t(pca_result$rot.mat) %*% pca_result$rot.mat, 3)
##      [,1] [,2] [,3]
## [1,]    1    0    0
## [2,]    0    1    0
## [3,]    0    0    1

4. Factor Analysis (FA)

4a. Estimasi Faktor Matriks

# 1. Hitung Eigen Value
df2 <- data_scaled
cor_matrix <- cor(df2)
eigen_result <- eigen(cor_matrix)
eigenvalues  <- eigen_result$values
print(eigenvalues)
##  [1] 3.4778297 1.6141165 1.2551309 0.9734905 0.8200721 0.7344577 0.6005218
##  [8] 0.5484470 0.4516542 0.3226874 0.2015922
eigen <- data.frame(
  Eigenvalue   = round(eigenvalues, 3),
  Proporsi_Var = round(eigenvalues / sum(eigenvalues) * 100, 2),
  Kumulatif    = round(cumsum(eigenvalues / sum(eigenvalues)) * 100, 2)
)
print(eigen)
##    Eigenvalue Proporsi_Var Kumulatif
## 1       3.478        31.62     31.62
## 2       1.614        14.67     46.29
## 3       1.255        11.41     57.70
## 4       0.973         8.85     66.55
## 5       0.820         7.46     74.01
## 6       0.734         6.68     80.68
## 7       0.601         5.46     86.14
## 8       0.548         4.99     91.13
## 9       0.452         4.11     95.23
## 10      0.323         2.93     98.17
## 11      0.202         1.83    100.00
# 2. FA tanpa rotasi
n_factors <- sum(eigenvalues > 1)
fa_unrotated <- fa(df2, 
                   nfactors = n_factors, 
                   rotate   = "none", 
                   fm       = "pa",   
                   use      = "complete.obs")
print(fa_unrotated$loadings, cutoff = 0.1)
## 
## Loadings:
##                        PA1    PA2    PA3   
## Age                     0.320        -0.318
## Systolic.BP             0.628 -0.699       
## Diastolic               0.611 -0.561       
## BS                      0.775  0.161 -0.294
## Body.Temp                      0.301  0.184
## BMI                     0.575              
## Previous.Complications  0.546  0.296       
## Preexisting.Diabetes    0.578  0.285 -0.255
## Gestational.Diabetes    0.285         0.558
## Mental.Health           0.560  0.289  0.161
## Heart.Rate              0.424  0.192       
## 
##                  PA1   PA2   PA3
## SS loadings    3.009 1.225 0.656
## Proportion Var 0.274 0.111 0.060
## Cumulative Var 0.274 0.385 0.445
cat("\n>> Jumlah faktor dengan eigenvalue > 1:", n_factors, "\n")
## 
## >> Jumlah faktor dengan eigenvalue > 1: 3

4b. Communality

commun <- data.frame(Communality = round(fa_unrotated$communality, 3))
print(commun)
##                        Communality
## Age                          0.203
## Systolic.BP                  0.887
## Diastolic                    0.692
## BS                           0.713
## Body.Temp                    0.126
## BMI                          0.347
## Previous.Complications       0.395
## Preexisting.Diabetes         0.480
## Gestational.Diabetes         0.399
## Mental.Health                0.423
## Heart.Rate                   0.225

4c. Factor Rotation

varimax <- fa(df2, 
                 nfactors = n_factors, 
                 rotate   = "varimax", 
                 fm       = "pa",
                 use      = "complete.obs")

4d. Interpretasi & Respecifikasi

# 1. Communality
print(varimax$loadings, cutoff = 0.4)
## 
## Loadings:
##                        PA1    PA2    PA3   
## Age                     0.404              
## Systolic.BP                    0.923       
## Diastolic                      0.796       
## BS                      0.807              
## Body.Temp                                  
## BMI                     0.434              
## Previous.Complications  0.485              
## Preexisting.Diabetes    0.684              
## Gestational.Diabetes                  0.615
## Mental.Health           0.459         0.457
## Heart.Rate                                 
## 
##                  PA1   PA2   PA3
## SS loadings    2.128 1.674 1.089
## Proportion Var 0.193 0.152 0.099
## Cumulative Var 0.193 0.346 0.445
commun_varimax <- data.frame(
  Variabel    = names(varimax$communality),
  Communality = round(varimax$communality, 3)
)
print(commun_varimax)
##                                      Variabel Communality
## Age                                       Age       0.203
## Systolic.BP                       Systolic.BP       0.887
## Diastolic                           Diastolic       0.692
## BS                                         BS       0.713
## Body.Temp                           Body.Temp       0.126
## BMI                                       BMI       0.347
## Previous.Complications Previous.Complications       0.395
## Preexisting.Diabetes     Preexisting.Diabetes       0.480
## Gestational.Diabetes     Gestational.Diabetes       0.399
## Mental.Health                   Mental.Health       0.423
## Heart.Rate                         Heart.Rate       0.225
# 2. Variance
load_varimax <- as.matrix(varimax$loadings)
var_varimax <- data.frame(
  Faktor       = colnames(load_varimax),
  SS_Loadings  = round(colSums(load_varimax^2), 3),
  Proporsi_Var = round(colSums(load_varimax^2) / ncol(df2) * 100, 2),
  Kumulatif    = round(cumsum(colSums(load_varimax^2) / ncol(df2)) * 100, 2)
)
print(var_varimax)
##     Faktor SS_Loadings Proporsi_Var Kumulatif
## PA1    PA1       2.128        19.34     19.34
## PA2    PA2       1.674        15.22     34.56
## PA3    PA3       1.089         9.90     44.46
# 3. Cross loading
for (i in 1:nrow(load_varimax)) {
  sig_idx <- which(abs(load_varimax[i, ]) >= 0.40)
  if (length(sig_idx) > 1) {
    vals <- sort(abs(load_varimax[i, sig_idx]), decreasing = TRUE)
    ratio <- vals[1]^2 / vals[2]^2
    if (ratio <= 1.5) {
      status <- "kuat"
    } else if (ratio <= 2.0) {
      status <- "sedang"
    } else {
      status <- "lemah"
    }
    cat(sprintf("%-25s | %.3f vs %.3f | Ratio: %.2f | %s\n",
                rownames(load_varimax)[i],
                vals[1], vals[2], ratio, status))
  }
}
## Mental.Health             | 0.459 vs 0.457 | Ratio: 1.01 | kuat
# 4. Variabel per Faktor
for (j in 1:ncol(load_varimax)) {
  vars_in_factor <- rownames(load_varimax)[abs(load_varimax[, j]) >= 0.40]
  cat("Faktor", j, ":", paste(vars_in_factor, collapse = ", "), "\n")
}
## Faktor 1 : Age, BS, BMI, Previous.Complications, Preexisting.Diabetes, Mental.Health 
## Faktor 2 : Systolic.BP, Diastolic 
## Faktor 3 : Gestational.Diabetes, Mental.Health