# Memuat library yang dibutuhkan
library(readxl)
library(car)
## Loading required package: carData
library(stats)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Memuat data dari file excel
file_path <- "/home/taufiq/Documents/freelancer/kak qardawi/project baru/data/data.xlsx"
data <- read_excel(file_path, sheet = "Sheet1")

# Mengganti tanda kurung dalam nama kolom
colnames(data) <- gsub("\\(|\\)", "_", colnames(data))
# Memisahkan data berdasarkan jenis kelamin
data_perempuan <- data %>% filter(Jenis_Kelamin == "Perempuan")
data_laki_laki <- data %>% filter(Jenis_Kelamin == "Laki-laki")

# Tentukan variabel dependen dan independen
dependent_var <- "Usia"  # Ganti dengan variabel dependen yang sesuai
# Fungsi untuk menghapus outlier
remove_outliers <- function(df, var) {
  df <- df %>% mutate_at(vars(var), as.numeric)
  Q1 <- quantile(df[[var]], 0.25, na.rm = TRUE)
  Q3 <- quantile(df[[var]], 0.75, na.rm = TRUE)
  IQR <- Q3 - Q1
  lower_bound <- Q1 - 1.5 * IQR
  upper_bound <- Q3 + 1.5 * IQR
  df <- df %>% filter(df[[var]] >= lower_bound & df[[var]] <= upper_bound)
  return(df)
}

# Menghapus outlier dari data
clean_data_perempuan <- data_perempuan
clean_data_laki_laki <- data_laki_laki

desc_vars_perempuan <- colnames(data_perempuan)[3:ncol(data_perempuan)]
desc_vars_laki_laki <- colnames(data_laki_laki)[3:ncol(data_laki_laki)]

for (var in desc_vars_perempuan) {
  clean_data_perempuan <- remove_outliers(clean_data_perempuan, var)
}
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(var)
## 
##   # Now:
##   data %>% select(all_of(var))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
for (var in desc_vars_laki_laki) {
  clean_data_laki_laki <- remove_outliers(clean_data_laki_laki, var)
}
# Memastikan semua variabel independen adalah numerik
numeric_data_perempuan <- clean_data_perempuan %>% mutate_at(vars(desc_vars_perempuan), as.numeric)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(desc_vars_perempuan)
## 
##   # Now:
##   data %>% select(all_of(desc_vars_perempuan))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
numeric_data_laki_laki <- clean_data_laki_laki %>% mutate_at(vars(desc_vars_laki_laki), as.numeric)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(desc_vars_laki_laki)
## 
##   # Now:
##   data %>% select(all_of(desc_vars_laki_laki))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Melakukan uji ANOVA dan Kruskal-Wallis
perform_tests <- function(dependent_var, independent_var, data) {
  shapiro_test <- shapiro.test(data[[dependent_var]])
  cat("\nShapiro-Wilk normality test for", dependent_var, "\n")
  print(shapiro_test)
  
  if (shapiro_test$p.value > 0.05) {
    homogeneity_test <- bartlett.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
    cat("\nBartlett test for homogeneity of variances for", dependent_var, "and", independent_var, "\n")
  } else {
    # Menambahkan pengecekan untuk menghindari error
    if (length(unique(data[[independent_var]])) > 1) {
      homogeneity_test <- fligner.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
      cat("\nFligner-Killeen test for homogeneity of variances for", dependent_var, "and", independent_var, "\n")
    } else {
      cat("\nFligner-Killeen test cannot be performed because all observations are in the same group for", independent_var, "\n")
      homogeneity_test <- list(p.value = NA)
    }
  }
  print(homogeneity_test)
  
  result <- list()
  
  if (shapiro_test$p.value > 0.05 & homogeneity_test$p.value > 0.05) {
    anova_result <- aov(as.formula(paste(dependent_var, "~", independent_var)), data = data)
    result$anova <- summary(anova_result)
    cat("\nANOVA result for", dependent_var, "and", independent_var, "\n")
    print(result$anova)
  } else {
    # Menambahkan pengecekan untuk menghindari error
    if (length(unique(data[[independent_var]])) > 1) {
      kruskal_test <- kruskal.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
      result$kruskal <- kruskal_test
      cat("\nKruskal-Wallis test result for", dependent_var, "and", independent_var, "\n")
      print(result$kruskal)
    } else {
      cat("\nKruskal-Wallis test cannot be performed because all observations are in the same group for", independent_var, "\n")
      result$kruskal <- list(p.value = NA)
    }
  }
  
  return(result)
}

results_perempuan <- list()
for (independent_var in colnames(numeric_data_perempuan[, -1])) {
  results_perempuan[[independent_var]] <- perform_tests(dependent_var, independent_var, numeric_data_perempuan)
}
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test cannot be performed because all observations are in the same group for Jenis_Kelamin 
## $p.value
## [1] NA
## 
## 
## Kruskal-Wallis test cannot be performed because all observations are in the same group for Jenis_Kelamin 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB1 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB1
## Fligner-Killeen:med chi-squared = 55.229, df = 55, p-value = 0.466
## 
## 
## Kruskal-Wallis test result for Usia and BB1 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB1
## Kruskal-Wallis chi-squared = 58.673, df = 55, p-value = 0.3424
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB2 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB2
## Fligner-Killeen:med chi-squared = 59.908, df = 56, p-value = 0.3359
## 
## 
## Kruskal-Wallis test result for Usia and BB2 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB2
## Kruskal-Wallis chi-squared = 58.781, df = 56, p-value = 0.374
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB3 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB3
## Fligner-Killeen:med chi-squared = 61.837, df = 55, p-value = 0.2452
## 
## 
## Kruskal-Wallis test result for Usia and BB3 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB3
## Kruskal-Wallis chi-squared = 59.158, df = 55, p-value = 0.3263
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MA_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MA_D_
## Fligner-Killeen:med chi-squared = 42.405, df = 57, p-value = 0.9252
## 
## 
## Kruskal-Wallis test result for Usia and MA_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_D_
## Kruskal-Wallis chi-squared = 59.061, df = 57, p-value = 0.4001
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MA_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MA_S_
## Fligner-Killeen:med chi-squared = 65.491, df = 54, p-value = 0.1359
## 
## 
## Kruskal-Wallis test result for Usia and MA_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_S_
## Kruskal-Wallis chi-squared = 58.426, df = 54, p-value = 0.3161
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RL_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RL_D_
## Fligner-Killeen:med chi-squared = 50.201, df = 53, p-value = 0.5838
## 
## 
## Kruskal-Wallis test result for Usia and RL_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_D_
## Kruskal-Wallis chi-squared = 56.283, df = 53, p-value = 0.3531
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RL_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RL_S_
## Fligner-Killeen:med chi-squared = 52.624, df = 52, p-value = 0.4498
## 
## 
## Kruskal-Wallis test result for Usia and RL_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_S_
## Kruskal-Wallis chi-squared = 56.177, df = 52, p-value = 0.3213
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CRH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CRH_D_
## Fligner-Killeen:med chi-squared = 66, df = 51, p-value = 0.07708
## 
## 
## Kruskal-Wallis test result for Usia and CRH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_D_
## Kruskal-Wallis chi-squared = 61.399, df = 51, p-value = 0.151
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CRH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CRH_S_
## Fligner-Killeen:med chi-squared = 45.637, df = 50, p-value = 0.649
## 
## 
## Kruskal-Wallis test result for Usia and CRH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_S_
## Kruskal-Wallis chi-squared = 57.118, df = 50, p-value = 0.2277
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CL_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CL_D_
## Fligner-Killeen:med chi-squared = 51.237, df = 51, p-value = 0.4644
## 
## 
## Kruskal-Wallis test result for Usia and CL_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_D_
## Kruskal-Wallis chi-squared = 51.742, df = 51, p-value = 0.4447
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CL_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CL_S_
## Fligner-Killeen:med chi-squared = 41.689, df = 52, p-value = 0.8462
## 
## 
## Kruskal-Wallis test result for Usia and CL_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_S_
## Kruskal-Wallis chi-squared = 57.59, df = 52, p-value = 0.2761
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RH_D_
## Fligner-Killeen:med chi-squared = 49.936, df = 51, p-value = 0.5159
## 
## 
## Kruskal-Wallis test result for Usia and RH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_D_
## Kruskal-Wallis chi-squared = 62.379, df = 51, p-value = 0.1319
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RH_S_
## Fligner-Killeen:med chi-squared = 62.066, df = 54, p-value = 0.2107
## 
## 
## Kruskal-Wallis test result for Usia and RH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_S_
## Kruskal-Wallis chi-squared = 53.59, df = 54, p-value = 0.4902
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and ML_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by ML_D_
## Fligner-Killeen:med chi-squared = 66, df = 52, p-value = 0.09175
## 
## 
## Kruskal-Wallis test result for Usia and ML_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_D_
## Kruskal-Wallis chi-squared = 54.373, df = 52, p-value = 0.3842
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and ML_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by ML_S_
## Fligner-Killeen:med chi-squared = 51.942, df = 46, p-value = 0.2535
## 
## 
## Kruskal-Wallis test result for Usia and ML_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_S_
## Kruskal-Wallis chi-squared = 60.082, df = 46, p-value = 0.0795
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRH_D_
## Fligner-Killeen:med chi-squared = 55.483, df = 55, p-value = 0.4564
## 
## 
## Kruskal-Wallis test result for Usia and MRH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_D_
## Kruskal-Wallis chi-squared = 56.181, df = 55, p-value = 0.4304
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRH_S_
## Fligner-Killeen:med chi-squared = 48.331, df = 50, p-value = 0.5406
## 
## 
## Kruskal-Wallis test result for Usia and MRH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_S_
## Kruskal-Wallis chi-squared = 46.985, df = 50, p-value = 0.5951
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRB_D_
## Fligner-Killeen:med chi-squared = 53.316, df = 47, p-value = 0.2443
## 
## 
## Kruskal-Wallis test result for Usia and MRB_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_D_
## Kruskal-Wallis chi-squared = 47.713, df = 47, p-value = 0.4436
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRB_S_
## Fligner-Killeen:med chi-squared = 36.404, df = 46, p-value = 0.8435
## 
## 
## Kruskal-Wallis test result for Usia and MRB_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_S_
## Kruskal-Wallis chi-squared = 45.049, df = 46, p-value = 0.512
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MrB_D_
## Fligner-Killeen:med chi-squared = 47.313, df = 42, p-value = 0.2647
## 
## 
## Kruskal-Wallis test result for Usia and MrB_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_D_
## Kruskal-Wallis chi-squared = 43.414, df = 42, p-value = 0.4109
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MrB_S_
## Fligner-Killeen:med chi-squared = 53.494, df = 44, p-value = 0.1545
## 
## 
## Kruskal-Wallis test result for Usia and MrB_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_S_
## Kruskal-Wallis chi-squared = 47.809, df = 44, p-value = 0.3208
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MBH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MBH_D_
## Fligner-Killeen:med chi-squared = 47.246, df = 51, p-value = 0.6235
## 
## 
## Kruskal-Wallis test result for Usia and MBH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_D_
## Kruskal-Wallis chi-squared = 56.842, df = 51, p-value = 0.2666
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.88676, p-value = 1.777e-05
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MBH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MBH_S_
## Fligner-Killeen:med chi-squared = 47.662, df = 48, p-value = 0.4866
## 
## 
## Kruskal-Wallis test result for Usia and MBH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_S_
## Kruskal-Wallis chi-squared = 56.854, df = 48, p-value = 0.1786
for (independent_var in names(results_perempuan)) {
  cat("\nHasil uji untuk variabel independen (Perempuan):", independent_var, "\n")
  print(results_perempuan[[independent_var]])
}
## 
## Hasil uji untuk variabel independen (Perempuan): Jenis_Kelamin 
## $kruskal
## $kruskal$p.value
## [1] NA
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): BB1 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB1
## Kruskal-Wallis chi-squared = 58.673, df = 55, p-value = 0.3424
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): BB2 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB2
## Kruskal-Wallis chi-squared = 58.781, df = 56, p-value = 0.374
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): BB3 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB3
## Kruskal-Wallis chi-squared = 59.158, df = 55, p-value = 0.3263
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MA_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_D_
## Kruskal-Wallis chi-squared = 59.061, df = 57, p-value = 0.4001
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MA_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_S_
## Kruskal-Wallis chi-squared = 58.426, df = 54, p-value = 0.3161
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): RL_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_D_
## Kruskal-Wallis chi-squared = 56.283, df = 53, p-value = 0.3531
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): RL_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_S_
## Kruskal-Wallis chi-squared = 56.177, df = 52, p-value = 0.3213
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): CRH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_D_
## Kruskal-Wallis chi-squared = 61.399, df = 51, p-value = 0.151
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): CRH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_S_
## Kruskal-Wallis chi-squared = 57.118, df = 50, p-value = 0.2277
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): CL_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_D_
## Kruskal-Wallis chi-squared = 51.742, df = 51, p-value = 0.4447
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): CL_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_S_
## Kruskal-Wallis chi-squared = 57.59, df = 52, p-value = 0.2761
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): RH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_D_
## Kruskal-Wallis chi-squared = 62.379, df = 51, p-value = 0.1319
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): RH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_S_
## Kruskal-Wallis chi-squared = 53.59, df = 54, p-value = 0.4902
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): ML_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_D_
## Kruskal-Wallis chi-squared = 54.373, df = 52, p-value = 0.3842
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): ML_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_S_
## Kruskal-Wallis chi-squared = 60.082, df = 46, p-value = 0.0795
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MRH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_D_
## Kruskal-Wallis chi-squared = 56.181, df = 55, p-value = 0.4304
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MRH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_S_
## Kruskal-Wallis chi-squared = 46.985, df = 50, p-value = 0.5951
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MRB_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_D_
## Kruskal-Wallis chi-squared = 47.713, df = 47, p-value = 0.4436
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MRB_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_S_
## Kruskal-Wallis chi-squared = 45.049, df = 46, p-value = 0.512
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MrB_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_D_
## Kruskal-Wallis chi-squared = 43.414, df = 42, p-value = 0.4109
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MrB_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_S_
## Kruskal-Wallis chi-squared = 47.809, df = 44, p-value = 0.3208
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MBH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_D_
## Kruskal-Wallis chi-squared = 56.842, df = 51, p-value = 0.2666
## 
## 
## 
## Hasil uji untuk variabel independen (Perempuan): MBH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_S_
## Kruskal-Wallis chi-squared = 56.854, df = 48, p-value = 0.1786
results_laki_laki <- list()
for (independent_var in colnames(numeric_data_laki_laki[, -1])) {
  results_laki_laki[[independent_var]] <- perform_tests(dependent_var, independent_var, numeric_data_laki_laki)
}
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test cannot be performed because all observations are in the same group for Jenis_Kelamin 
## $p.value
## [1] NA
## 
## 
## Kruskal-Wallis test cannot be performed because all observations are in the same group for Jenis_Kelamin 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB1 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB1
## Fligner-Killeen:med chi-squared = 89.101, df = 80, p-value = 0.2278
## 
## 
## Kruskal-Wallis test result for Usia and BB1 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB1
## Kruskal-Wallis chi-squared = 83.912, df = 80, p-value = 0.3606
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB2 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB2
## Fligner-Killeen:med chi-squared = 76.408, df = 73, p-value = 0.3697
## 
## 
## Kruskal-Wallis test result for Usia and BB2 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB2
## Kruskal-Wallis chi-squared = 84.75, df = 73, p-value = 0.1638
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and BB3 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by BB3
## Fligner-Killeen:med chi-squared = 69.929, df = 70, p-value = 0.4799
## 
## 
## Kruskal-Wallis test result for Usia and BB3 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB3
## Kruskal-Wallis chi-squared = 70.269, df = 70, p-value = 0.4685
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MA_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MA_D_
## Fligner-Killeen:med chi-squared = 84.836, df = 77, p-value = 0.2532
## 
## 
## Kruskal-Wallis test result for Usia and MA_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_D_
## Kruskal-Wallis chi-squared = 91.152, df = 77, p-value = 0.1292
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MA_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MA_S_
## Fligner-Killeen:med chi-squared = 79.333, df = 82, p-value = 0.5629
## 
## 
## Kruskal-Wallis test result for Usia and MA_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_S_
## Kruskal-Wallis chi-squared = 82.097, df = 82, p-value = 0.4762
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RL_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RL_D_
## Fligner-Killeen:med chi-squared = 91.3, df = 78, p-value = 0.144
## 
## 
## Kruskal-Wallis test result for Usia and RL_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_D_
## Kruskal-Wallis chi-squared = 81.639, df = 78, p-value = 0.3669
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RL_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RL_S_
## Fligner-Killeen:med chi-squared = 74.594, df = 72, p-value = 0.394
## 
## 
## Kruskal-Wallis test result for Usia and RL_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_S_
## Kruskal-Wallis chi-squared = 65.796, df = 72, p-value = 0.6832
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CRH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CRH_D_
## Fligner-Killeen:med chi-squared = 72.147, df = 69, p-value = 0.3743
## 
## 
## Kruskal-Wallis test result for Usia and CRH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_D_
## Kruskal-Wallis chi-squared = 76.493, df = 69, p-value = 0.2507
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CRH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CRH_S_
## Fligner-Killeen:med chi-squared = 71.979, df = 74, p-value = 0.5449
## 
## 
## Kruskal-Wallis test result for Usia and CRH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_S_
## Kruskal-Wallis chi-squared = 86.173, df = 74, p-value = 0.1575
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CL_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CL_D_
## Fligner-Killeen:med chi-squared = 80.079, df = 76, p-value = 0.3524
## 
## 
## Kruskal-Wallis test result for Usia and CL_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_D_
## Kruskal-Wallis chi-squared = 76.319, df = 76, p-value = 0.4682
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and CL_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by CL_S_
## Fligner-Killeen:med chi-squared = 68.992, df = 66, p-value = 0.3766
## 
## 
## Kruskal-Wallis test result for Usia and CL_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_S_
## Kruskal-Wallis chi-squared = 75.061, df = 66, p-value = 0.2082
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RH_D_
## Fligner-Killeen:med chi-squared = 85.541, df = 81, p-value = 0.3437
## 
## 
## Kruskal-Wallis test result for Usia and RH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_D_
## Kruskal-Wallis chi-squared = 84.008, df = 81, p-value = 0.3876
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and RH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by RH_S_
## Fligner-Killeen:med chi-squared = 89.846, df = 81, p-value = 0.2348
## 
## 
## Kruskal-Wallis test result for Usia and RH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_S_
## Kruskal-Wallis chi-squared = 82.967, df = 81, p-value = 0.4186
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and ML_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by ML_D_
## Fligner-Killeen:med chi-squared = 69.161, df = 71, p-value = 0.5397
## 
## 
## Kruskal-Wallis test result for Usia and ML_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_D_
## Kruskal-Wallis chi-squared = 73.871, df = 71, p-value = 0.3845
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and ML_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by ML_S_
## Fligner-Killeen:med chi-squared = 74.519, df = 72, p-value = 0.3963
## 
## 
## Kruskal-Wallis test result for Usia and ML_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_S_
## Kruskal-Wallis chi-squared = 72.455, df = 72, p-value = 0.4628
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRH_D_
## Fligner-Killeen:med chi-squared = 96.085, df = 77, p-value = 0.06954
## 
## 
## Kruskal-Wallis test result for Usia and MRH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_D_
## Kruskal-Wallis chi-squared = 85.441, df = 77, p-value = 0.2388
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRH_S_
## Fligner-Killeen:med chi-squared = 85.516, df = 76, p-value = 0.2132
## 
## 
## Kruskal-Wallis test result for Usia and MRH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_S_
## Kruskal-Wallis chi-squared = 86.374, df = 76, p-value = 0.195
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRB_D_
## Fligner-Killeen:med chi-squared = 80.16, df = 64, p-value = 0.08363
## 
## 
## Kruskal-Wallis test result for Usia and MRB_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_D_
## Kruskal-Wallis chi-squared = 64.37, df = 64, p-value = 0.4635
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MRB_S_
## Fligner-Killeen:med chi-squared = 64.387, df = 67, p-value = 0.5678
## 
## 
## Kruskal-Wallis test result for Usia and MRB_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_S_
## Kruskal-Wallis chi-squared = 76.104, df = 67, p-value = 0.2088
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MrB_D_
## Fligner-Killeen:med chi-squared = 71.689, df = 66, p-value = 0.2949
## 
## 
## Kruskal-Wallis test result for Usia and MrB_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_D_
## Kruskal-Wallis chi-squared = 67.843, df = 66, p-value = 0.4142
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MrB_S_
## Fligner-Killeen:med chi-squared = 75.207, df = 61, p-value = 0.1043
## 
## 
## Kruskal-Wallis test result for Usia and MrB_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_S_
## Kruskal-Wallis chi-squared = 76.103, df = 61, p-value = 0.09214
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MBH_D_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MBH_D_
## Fligner-Killeen:med chi-squared = 72.04, df = 56, p-value = 0.07314
## 
## 
## Kruskal-Wallis test result for Usia and MBH_D_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_D_
## Kruskal-Wallis chi-squared = 78.762, df = 56, p-value = 0.02415
## 
## 
## Shapiro-Wilk normality test for Usia 
## 
##  Shapiro-Wilk normality test
## 
## data:  data[[dependent_var]]
## W = 0.94666, p-value = 0.0005412
## 
## 
## Fligner-Killeen test for homogeneity of variances for Usia and MBH_S_ 
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Usia by MBH_S_
## Fligner-Killeen:med chi-squared = 58.577, df = 59, p-value = 0.4911
## 
## 
## Kruskal-Wallis test result for Usia and MBH_S_ 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_S_
## Kruskal-Wallis chi-squared = 86.676, df = 59, p-value = 0.01097
for (independent_var in names(results_laki_laki)) {
  cat("\nHasil uji untuk variabel independen (Laki-laki):", independent_var, "\n")
  print(results_laki_laki[[independent_var]])
}
## 
## Hasil uji untuk variabel independen (Laki-laki): Jenis_Kelamin 
## $kruskal
## $kruskal$p.value
## [1] NA
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): BB1 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB1
## Kruskal-Wallis chi-squared = 83.912, df = 80, p-value = 0.3606
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): BB2 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB2
## Kruskal-Wallis chi-squared = 84.75, df = 73, p-value = 0.1638
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): BB3 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by BB3
## Kruskal-Wallis chi-squared = 70.269, df = 70, p-value = 0.4685
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MA_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_D_
## Kruskal-Wallis chi-squared = 91.152, df = 77, p-value = 0.1292
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MA_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MA_S_
## Kruskal-Wallis chi-squared = 82.097, df = 82, p-value = 0.4762
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): RL_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_D_
## Kruskal-Wallis chi-squared = 81.639, df = 78, p-value = 0.3669
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): RL_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RL_S_
## Kruskal-Wallis chi-squared = 65.796, df = 72, p-value = 0.6832
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): CRH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_D_
## Kruskal-Wallis chi-squared = 76.493, df = 69, p-value = 0.2507
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): CRH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CRH_S_
## Kruskal-Wallis chi-squared = 86.173, df = 74, p-value = 0.1575
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): CL_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_D_
## Kruskal-Wallis chi-squared = 76.319, df = 76, p-value = 0.4682
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): CL_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by CL_S_
## Kruskal-Wallis chi-squared = 75.061, df = 66, p-value = 0.2082
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): RH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_D_
## Kruskal-Wallis chi-squared = 84.008, df = 81, p-value = 0.3876
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): RH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by RH_S_
## Kruskal-Wallis chi-squared = 82.967, df = 81, p-value = 0.4186
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): ML_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_D_
## Kruskal-Wallis chi-squared = 73.871, df = 71, p-value = 0.3845
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): ML_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by ML_S_
## Kruskal-Wallis chi-squared = 72.455, df = 72, p-value = 0.4628
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MRH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_D_
## Kruskal-Wallis chi-squared = 85.441, df = 77, p-value = 0.2388
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MRH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRH_S_
## Kruskal-Wallis chi-squared = 86.374, df = 76, p-value = 0.195
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MRB_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_D_
## Kruskal-Wallis chi-squared = 64.37, df = 64, p-value = 0.4635
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MRB_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MRB_S_
## Kruskal-Wallis chi-squared = 76.104, df = 67, p-value = 0.2088
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MrB_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_D_
## Kruskal-Wallis chi-squared = 67.843, df = 66, p-value = 0.4142
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MrB_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MrB_S_
## Kruskal-Wallis chi-squared = 76.103, df = 61, p-value = 0.09214
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MBH_D_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_D_
## Kruskal-Wallis chi-squared = 78.762, df = 56, p-value = 0.02415
## 
## 
## 
## Hasil uji untuk variabel independen (Laki-laki): MBH_S_ 
## $kruskal
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Usia by MBH_S_
## Kruskal-Wallis chi-squared = 86.676, df = 59, p-value = 0.01097
# Uji multikolinearitas sebelum DFA
multicollinearity_test <- function(data, dependent_var, independent_vars) {
  formula <- as.formula(paste(dependent_var, "~", paste(independent_vars, collapse = "+")))
  model <- lm(formula, data = data)
  vif_values <- vif(model)
  return(vif_values)
}

vif_results_perempuan_before <- multicollinearity_test(clean_data_perempuan, dependent_var, desc_vars_perempuan)
cat("\nUji Multikolinearitas Sebelum DFA (Perempuan):\n")
## 
## Uji Multikolinearitas Sebelum DFA (Perempuan):
print(vif_results_perempuan_before)
##       BB1       BB2       BB3     MA_D_     MA_S_     RL_D_     RL_S_    CRH_D_ 
##  2.749794  1.757259  2.248114 12.150471  5.363303 27.939698 24.880387 34.856994 
##    CRH_S_     CL_D_     CL_S_     RH_D_     RH_S_     ML_D_     ML_S_    MRH_D_ 
## 19.409808 33.772597 23.405646 24.347881 13.567499  3.913501  3.602341 23.577601 
##    MRH_S_    MRB_D_    MRB_S_    MrB_D_    MrB_S_    MBH_D_    MBH_S_ 
## 23.310823 12.968071 13.448892  5.367834  3.734209  8.219257  8.392261
vif_results_laki_laki_before <- multicollinearity_test(clean_data_laki_laki, dependent_var, desc_vars_laki_laki)
cat("\nUji Multikolinearitas Sebelum DFA (Laki-laki):\n")
## 
## Uji Multikolinearitas Sebelum DFA (Laki-laki):
print(vif_results_laki_laki_before)
##       BB1       BB2       BB3     MA_D_     MA_S_     RL_D_     RL_S_    CRH_D_ 
##  2.323296  2.473788  1.955812 11.429444 11.888775 22.644991 17.317910 31.257623 
##    CRH_S_     CL_D_     CL_S_     RH_D_     RH_S_     ML_D_     ML_S_    MRH_D_ 
## 15.935203 20.031286 12.216042 29.563146 35.014505  7.400993  7.474817 21.297098 
##    MRH_S_    MRB_D_    MRB_S_    MrB_D_    MrB_S_    MBH_D_    MBH_S_ 
## 21.573966 19.586698 19.785924 18.620493 15.990789  7.105134  7.704028
# Melakukan DFA
d_data_perempuan <- clean_data_perempuan[, c(dependent_var, desc_vars_perempuan)]
d_data_perempuan[[dependent_var]] <- as.factor(d_data_perempuan[[dependent_var]])

d_data_laki_laki <- clean_data_laki_laki[, c(dependent_var, desc_vars_laki_laki)]
d_data_laki_laki[[dependent_var]] <- as.factor(d_data_laki_laki[[dependent_var]])

# DFA untuk Perempuan
discriminant_result_perempuan <- lda(as.formula(paste(dependent_var, "~", paste(desc_vars_perempuan, collapse = "+"))), data = d_data_perempuan)
print(discriminant_result_perempuan)
## Call:
## lda(as.formula(paste(dependent_var, "~", paste(desc_vars_perempuan, 
##     collapse = "+"))), data = d_data_perempuan)
## 
## Prior probabilities of groups:
##          2          3          4          5          6          7          8 
## 0.16417910 0.13432836 0.04477612 0.13432836 0.07462687 0.10447761 0.17910448 
##          9 
## 0.16417910 
## 
## Group means:
##        BB1      BB2      BB3    MA_D_    MA_S_    RL_D_    RL_S_   CRH_D_
## 2 115.5273 93.20909 92.22727 128.2909 128.9818 50.57273 50.90909 55.04545
## 3 116.4111 94.85556 93.61111 127.4133 128.2333 52.53333 52.18889 56.92222
## 4 112.6333 94.63333 91.90000 124.6333 125.2667 52.76667 52.30000 55.36667
## 5 121.1111 96.56667 92.51111 122.6000 124.3111 55.23333 53.95556 60.11111
## 6 118.8800 98.56000 97.94000 127.1800 121.3200 55.64000 55.80000 60.06000
## 7 119.9857 95.14286 93.65714 127.4286 126.1714 51.31429 50.64286 57.17143
## 8 121.2550 89.42500 97.75000 125.5083 126.0250 55.75833 55.09167 59.25833
## 9 122.9909 97.07273 94.40909 126.7000 125.5818 55.95455 56.10000 59.98182
##     CRH_S_    CL_D_    CL_S_    RH_D_    RH_S_    ML_D_    ML_S_   MRH_D_
## 2 54.80000 53.74545 53.64545 42.99091 43.68182 69.49091 70.17273 54.21818
## 3 55.85556 55.30000 54.22222 46.53333 46.41111 73.65556 74.32222 56.02222
## 4 55.93333 53.60000 53.93333 46.76667 47.50000 69.36667 71.36667 55.13333
## 5 58.75556 58.27778 56.87778 51.60000 49.94444 74.30000 74.28889 59.28889
## 6 61.00000 58.46000 58.46000 46.02000 45.32000 74.26000 74.76000 58.56000
## 7 57.52857 55.32857 55.32857 46.45714 46.37143 72.95714 72.67143 54.40000
## 8 59.16667 57.07500 57.34167 51.69167 51.17500 73.65000 74.37500 56.64167
## 9 60.24545 57.56364 58.07273 50.25455 50.55455 73.44545 72.25455 55.26364
##     MRH_S_   MRB_D_   MRB_S_   MrB_D_   MrB_S_   MBH_D_   MBH_S_
## 2 54.13636 40.03636 40.41818 30.70000 31.10000 25.74545 25.60909
## 3 55.06667 40.15556 39.76667 31.10000 31.11111 27.44444 27.65556
## 4 54.80000 41.16667 41.03333 28.90000 28.46667 25.13333 25.36667
## 5 57.91111 39.84444 40.60000 31.13333 31.91111 28.61111 28.56667
## 6 59.06000 42.52000 42.62000 31.82000 31.84000 30.42000 30.78000
## 7 53.74286 40.40000 40.21429 29.55714 30.72857 28.31429 27.67143
## 8 56.00000 43.87500 43.99583 32.53333 32.60000 31.70000 31.84167
## 9 55.69091 42.72727 43.17273 32.54545 31.99091 27.86364 27.61818
## 
## Coefficients of linear discriminants:
##                 LD1         LD2          LD3         LD4         LD5
## BB1     0.048252958 -0.11033251  0.073370220  0.05467813  0.15947232
## BB2    -0.271130896 -0.13431390 -0.001012248  0.15565472 -0.16567832
## BB3     0.286907547  0.02714287 -0.079499145 -0.08807346 -0.02803783
## MA_D_  -0.099666939 -0.15486544  0.009235343  0.33073345 -0.05422712
## MA_S_   0.054873048  0.34824064  0.194810682 -0.12765451 -0.11120559
## RL_D_  -0.321125922 -0.30096853  0.009280327  0.10181007  0.14536346
## RL_S_   0.059307797  0.10217863  0.084535298 -0.46133620 -0.22671486
## CRH_D_  0.005392424 -0.04310700 -0.009082225  0.11169567 -0.41554021
## CRH_S_  0.372948983 -0.29256290 -0.148309763 -0.19510206  0.35886228
## CL_D_  -0.185042862 -0.17053657  0.060464734 -0.04233007  0.39939499
## CL_S_  -0.193485505  0.30521120  0.242231536  0.08801486 -0.36065395
## RH_D_   0.403732235 -0.06749937 -0.152337300  0.29716064 -0.24146278
## RH_S_  -0.040411425  0.28856806  0.323977936  0.15597104  0.03222985
## ML_D_  -0.070709618  0.05859510  0.162245998  0.07466444  0.09693937
## ML_S_   0.071566635  0.16433502 -0.372364701  0.04098416 -0.30705118
## MRH_D_ -0.110511142  0.44265646 -0.060711406  0.21619499  0.13634172
## MRH_S_ -0.085230677 -0.33730358 -0.188606306 -0.18752253 -0.02689880
## MRB_D_  0.634270333  0.04814806 -0.553044626 -0.08325347 -0.20803415
## MRB_S_ -0.563492201 -0.17963313  0.571971313  0.11809264  0.35632069
## MrB_D_ -0.552120915  0.14946326  0.477152019 -0.56076010 -0.38228459
## MrB_S_  0.288346192 -0.13613503 -0.163731347  0.34447933  0.32315814
## MBH_D_  0.117084489  0.11143967  0.150485813  0.01359475  0.36295112
## MBH_S_  0.412720065 -0.22637016 -0.239399039 -0.10024113 -0.35604378
##                 LD6          LD7
## BB1     0.071043193 -0.044876509
## BB2    -0.103718051 -0.030357895
## BB3    -0.132531253 -0.042000085
## MA_D_  -0.044326540 -0.006469175
## MA_S_   0.057135240 -0.076124703
## RL_D_   0.373625287 -0.011028127
## RL_S_  -0.276730286 -0.010655602
## CRH_D_ -0.040429175  0.037397844
## CRH_S_ -0.255235929  0.017018270
## CL_D_   0.266989025 -0.017504612
## CL_S_   0.051243428 -0.070210534
## RH_D_   0.003843719  0.211404286
## RH_S_  -0.219198077 -0.108379197
## ML_D_   0.094559401 -0.188595877
## ML_S_  -0.067921670  0.131637319
## MRH_D_ -0.242096786  0.035568608
## MRH_S_  0.378910531  0.054368892
## MRB_D_ -0.370595527 -0.300983658
## MRB_S_  0.252627565  0.589545197
## MrB_D_  0.298825681 -0.216619132
## MrB_S_  0.034336986 -0.001410612
## MBH_D_ -0.096342035 -0.269174726
## MBH_S_  0.179217109  0.084105544
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4    LD5    LD6    LD7 
## 0.5127 0.1855 0.1387 0.0805 0.0356 0.0281 0.0189
d_scores_perempuan <- predict(discriminant_result_perempuan)$x
d_scores_perempuan <- as.data.frame(d_scores_perempuan)
colnames(d_scores_perempuan) <- paste0("LD", 1:ncol(d_scores_perempuan))

contributions_perempuan <- discriminant_result_perempuan$scaling
top_contributors_perempuan <- apply(contributions_perempuan, 2, function(x) desc_vars_perempuan[which.max(abs(x))])

contributions_df_perempuan <- data.frame(Discriminant = colnames(contributions_perempuan), Top_Contributor = top_contributors_perempuan)
print(contributions_df_perempuan)
##     Discriminant Top_Contributor
## LD1          LD1          MRB_D_
## LD2          LD2          MRH_D_
## LD3          LD3          MRB_S_
## LD4          LD4          MrB_D_
## LD5          LD5          CRH_D_
## LD6          LD6          MRH_S_
## LD7          LD7          MRB_S_
# DFA untuk Laki-laki
discriminant_result_laki_laki <- lda(as.formula(paste(dependent_var, "~", paste(desc_vars_laki_laki, collapse = "+"))), data = d_data_laki_laki)
print(discriminant_result_laki_laki)
## Call:
## lda(as.formula(paste(dependent_var, "~", paste(desc_vars_laki_laki, 
##     collapse = "+"))), data = d_data_laki_laki)
## 
## Prior probabilities of groups:
##          1          2          3          4          5          6          7 
## 0.02020202 0.11111111 0.14141414 0.11111111 0.17171717 0.13131313 0.10101010 
##          8          9 
## 0.12121212 0.09090909 
## 
## Group means:
##        BB1       BB2       BB3    MA_D_    MA_S_    RL_D_    RL_S_   CRH_D_
## 1 123.0000  96.85000  97.55000 126.3000 127.5500 53.50000 53.25000 56.75000
## 2 119.4182  97.64545  96.98182 127.2091 127.4545 53.33636 53.34545 59.59091
## 3 117.7429  93.95000  96.59286 123.8071 123.6071 58.21429 58.11429 62.22143
## 4 122.9091  99.10909  97.40909 127.5818 129.0818 58.53636 56.43636 63.30000
## 5 125.3118  97.65882 100.45882 125.6059 125.5765 59.88824 59.81176 62.62353
## 6 125.2615 100.06154 101.50000 128.2846 128.0462 58.95385 58.72308 65.54615
## 7 125.4100  99.06000 102.08000 123.1900 124.9600 60.03000 61.21000 65.77000
## 8 127.4050 101.05000 106.05000 126.3417 126.2000 62.00000 62.40000 63.55833
## 9 122.4667 100.01111 102.56667 127.6444 128.5333 62.01111 62.68889 65.78889
##     CRH_S_    CL_D_    CL_S_    RH_D_    RH_S_    ML_D_    ML_S_   MRH_D_
## 1 57.60000 56.90000 55.20000 47.85000 47.80000 75.50000 76.40000 57.75000
## 2 59.37273 57.90909 57.35455 46.58182 46.94545 73.48182 73.85455 58.84545
## 3 61.75000 60.16429 59.95000 53.47857 53.22143 76.57143 76.97857 61.20714
## 4 62.01818 61.50909 60.51818 49.15455 47.75455 74.37273 73.98182 62.84545
## 5 62.35294 60.77059 59.78824 54.97647 55.17647 76.01176 75.87647 62.26471
## 6 64.31538 62.88462 62.85385 50.76154 50.63846 75.41538 75.20000 64.87692
## 7 65.23000 65.18000 64.10000 60.16000 59.99000 79.74000 79.78000 64.11000
## 8 63.30833 63.55833 63.30833 56.75833 57.70000 77.11667 76.62500 65.33333
## 9 65.44444 63.60000 63.31111 54.84444 55.46667 77.36667 77.72222 67.15556
##     MRH_S_   MRB_D_   MRB_S_   MrB_D_   MrB_S_   MBH_D_   MBH_S_
## 1 57.10000 42.35000 43.45000 33.80000 34.60000 27.30000 27.90000
## 2 58.38182 41.79091 42.13636 33.48182 33.88182 27.12727 27.02727
## 3 61.13571 44.50714 44.33571 34.24286 34.53571 27.60000 27.75000
## 4 61.68182 41.99091 41.54545 31.13636 31.10909 30.40909 29.49091
## 5 62.30588 43.03529 43.31765 32.63529 33.16471 30.76471 30.60588
## 6 63.82308 42.52308 42.67692 31.26923 31.42308 30.76923 30.38462
## 7 64.32000 48.08000 48.55000 38.25000 38.42000 32.58000 32.78000
## 8 65.63333 45.60833 45.83333 33.47500 33.91667 32.94167 33.77500
## 9 66.54444 44.65556 44.81111 32.57778 32.72222 33.67778 33.33333
## 
## Coefficients of linear discriminants:
##                 LD1           LD2           LD3          LD4          LD5
## BB1     0.005186536 -0.0031832549 -0.0228839580 -0.133115226  0.141994196
## BB2     0.020699007 -0.0322700462  0.0986382081  0.135963281 -0.023455822
## BB3     0.016716963  0.0107980147  0.0701985011 -0.052406052 -0.041582293
## MA_D_   0.033452281  0.0786680691 -0.1017532843 -0.129600357  0.043747372
## MA_S_   0.067629532  0.0000331198  0.0085678276  0.145514969  0.024424865
## RL_D_  -0.124105113 -0.2314066819 -0.1777208579 -0.246742818  0.027321812
## RL_S_   0.234011383  0.2176153718 -0.0008007132  0.193847041 -0.101283627
## CRH_D_ -0.199387011 -0.3325527030 -0.6195552669  0.599224440  0.247501962
## CRH_S_ -0.111537252  0.2862145787 -0.2258982308 -0.364980779 -0.079672295
## CL_D_   0.420357760  0.2839171972  0.4774752369 -0.401836174 -0.099802213
## CL_S_  -0.007118510 -0.3155016917  0.1486305256  0.264619285  0.073431827
## RH_D_   0.131718959 -0.0192488322  0.0135788873  0.220166621  0.012452165
## RH_S_   0.076279516  0.1266664929 -0.1386262671 -0.238830265  0.100283918
## ML_D_   0.030946132 -0.2029346426  0.0036169232 -0.013455837  0.007117049
## ML_S_  -0.064116501  0.1001185628 -0.0889756824 -0.081697785 -0.091038560
## MRH_D_ -0.301329081  0.0275250229  0.2817989587  0.002374947 -0.105075410
## MRH_S_  0.033160991 -0.0768537050  0.0516450539 -0.066110013 -0.070350089
## MRB_D_ -0.266428701  0.0159274984  0.2137871485 -0.083236134 -0.383527280
## MRB_S_  0.219019953 -0.1126207447 -0.2383202828  0.056580959  0.109023030
## MrB_D_ -0.128384798 -0.0454840680  0.1074101093  0.402661032  0.292851330
## MrB_S_  0.139916593  0.5067392402  0.0072343114 -0.103070389 -0.083920544
## MBH_D_ -0.015139321 -0.2034062609 -0.0727274125  0.254772732  0.150310758
## MBH_S_  0.513922858 -0.0046228101  0.0685759780 -0.012306938 -0.087291897
##                  LD6          LD7          LD8
## BB1    -1.960548e-02  0.008783152 -0.092913954
## BB2     5.565241e-02  0.056291497  0.021799648
## BB3     7.792573e-03  0.056339728  0.080318064
## MA_D_  -9.774693e-02  0.205212442  0.043836236
## MA_S_   1.033137e-01 -0.187680665 -0.052550785
## RL_D_  -3.131764e-01 -0.212218682  0.078347378
## RL_S_   2.961460e-01  0.179297399 -0.054332279
## CRH_D_ -1.297294e-01  0.477233243  0.357555419
## CRH_S_  3.554082e-01 -0.298212229 -0.277881524
## CL_D_  -5.016582e-03 -0.311983040 -0.198081197
## CL_S_  -3.648274e-01  0.182774571  0.003392232
## RH_D_  -1.276387e-01  0.040741681 -0.059800780
## RH_S_   1.227557e-01 -0.007134815  0.087549312
## ML_D_  -1.645076e-01 -0.030114624  0.009322595
## ML_S_   1.424095e-01 -0.013084180 -0.203052423
## MRH_D_  4.079045e-01  0.066841447 -0.231503002
## MRH_S_ -3.010026e-01 -0.127675937  0.239277513
## MRB_D_ -5.343449e-02 -0.072333044  0.191850508
## MRB_S_  2.513646e-03  0.103138860 -0.361404751
## MrB_D_ -3.015825e-02  0.076021936  0.092924383
## MrB_S_  3.935252e-02 -0.059701039  0.193809823
## MBH_D_ -8.734776e-05 -0.276910125  0.328253354
## MBH_S_  7.128472e-02  0.165161718 -0.152979505
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4    LD5    LD6    LD7    LD8 
## 0.5595 0.2117 0.0773 0.0696 0.0309 0.0261 0.0181 0.0068
d_scores_laki_laki <- predict(discriminant_result_laki_laki)$x
d_scores_laki_laki <- as.data.frame(d_scores_laki_laki)
colnames(d_scores_laki_laki) <- paste0("LD", 1:ncol(d_scores_laki_laki))

contributions_laki_laki <- discriminant_result_laki_laki$scaling
top_contributors_laki_laki <- apply(contributions_laki_laki, 2, function(x) desc_vars_laki_laki[which.max(abs(x))])

contributions_df_laki_laki <- data.frame(Discriminant = colnames(contributions_laki_laki), Top_Contributor = top_contributors_laki_laki)
print(contributions_df_laki_laki)
##     Discriminant Top_Contributor
## LD1          LD1          MBH_S_
## LD2          LD2          MrB_S_
## LD3          LD3          CRH_D_
## LD4          LD4          CRH_D_
## LD5          LD5          MRB_D_
## LD6          LD6          MRH_D_
## LD7          LD7          CRH_D_
## LD8          LD8          MRB_S_
# Uji multikolinearitas setelah DFA
vif_results_perempuan <- multicollinearity_test(cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), dependent_var, colnames(d_scores_perempuan))
cat("\nUji Multikolinearitas Setelah DFA (Perempuan):\n")
## 
## Uji Multikolinearitas Setelah DFA (Perempuan):
print(vif_results_perempuan)
## LD1 LD2 LD3 LD4 LD5 LD6 LD7 
##   1   1   1   1   1   1   1
vif_results_laki_laki <- multicollinearity_test(cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), dependent_var, colnames(d_scores_laki_laki))
cat("\nUji Multikolinearitas Setelah DFA (Laki-laki):\n")
## 
## Uji Multikolinearitas Setelah DFA (Laki-laki):
print(vif_results_laki_laki)
## LD1 LD2 LD3 LD4 LD5 LD6 LD7 LD8 
##   1   1   1   1   1   1   1   1
# Membuat boxplot data asli dan bersih untuk Perempuan
melted_data_perempuan_original <- melt(data_perempuan[, desc_vars_perempuan])
## Using MrB_D_ as id variables
melted_data_perempuan_clean <- melt(clean_data_perempuan[, desc_vars_perempuan])
## No id variables; using all as measure variables
boxplot_plot_perempuan_original <- ggplot(melted_data_perempuan_original, aes(x = variable, y = value)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Boxplot untuk Variabel Independen (Perempuan - Data Asli)", x = "Variabel", y = "Nilai")

boxplot_plot_perempuan_clean <- ggplot(melted_data_perempuan_clean, aes(x = variable, y = value)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Boxplot untuk Variabel Independen (Perempuan - Data Bersih)", x = "Variabel", y = "Nilai")

print(boxplot_plot_perempuan_original)

print(boxplot_plot_perempuan_clean)

# Membuat boxplot data asli dan bersih untuk Laki-laki
melted_data_laki_laki_original <- melt(data_laki_laki[, desc_vars_laki_laki])
## Using MrB_D_ as id variables
melted_data_laki_laki_clean <- melt(clean_data_laki_laki[, desc_vars_laki_laki])
## No id variables; using all as measure variables
boxplot_plot_laki_laki_original <- ggplot(melted_data_laki_laki_original, aes(x = variable, y = value)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Boxplot untuk Variabel Independen (Laki-laki - Data Asli)", x = "Variabel", y = "Nilai")

boxplot_plot_laki_laki_clean <- ggplot(melted_data_laki_laki_clean, aes(x = variable, y = value)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Boxplot untuk Variabel Independen (Laki-laki - Data Bersih)", x = "Variabel", y = "Nilai")

print(boxplot_plot_laki_laki_original)

print(boxplot_plot_laki_laki_clean)

# Menghitung statistik deskriptif untuk semua variabel DFA
desc_stats_perempuan <- describe(d_scores_perempuan)
cat("\nStatistika Deskriptif (Perempuan):\n")
## 
## Statistika Deskriptif (Perempuan):
print(desc_stats_perempuan)
##     vars  n mean   sd median trimmed  mad   min  max range  skew kurtosis   se
## LD1    1 67    0 2.71  -0.62   -0.26 1.85 -3.98 6.45 10.43  0.96    -0.09 0.33
## LD2    2 67    0 1.80   0.09    0.09 1.58 -4.74 4.75  9.50 -0.43     0.24 0.22
## LD3    3 67    0 1.63   0.03   -0.01 1.64 -4.17 3.15  7.32  0.06    -0.42 0.20
## LD4    4 67    0 1.38  -0.29    0.03 1.42 -3.42 2.55  5.97 -0.12    -0.53 0.17
## LD5    5 67    0 1.16  -0.07   -0.02 0.89 -2.41 2.98  5.40  0.22    -0.09 0.14
## LD6    6 67    0 1.12  -0.02   -0.02 0.96 -3.73 2.46  6.19 -0.06     0.99 0.14
## LD7    7 67    0 1.06   0.00   -0.01 0.86 -2.91 2.93  5.84  0.02     0.54 0.13
desc_stats_laki_laki <- describe(d_scores_laki_laki)
cat("\nStatistika Deskriptif (Laki-laki):\n")
## 
## Statistika Deskriptif (Laki-laki):
print(desc_stats_laki_laki)
##     vars  n mean   sd median trimmed  mad   min  max range  skew kurtosis   se
## LD1    1 99    0 2.26  -0.39   -0.02 2.57 -4.64 5.40 10.04  0.10    -0.70 0.23
## LD2    2 99    0 1.58   0.06    0.03 1.84 -3.25 2.88  6.13 -0.13    -1.06 0.16
## LD3    3 99    0 1.22   0.08    0.03 1.24 -3.20 3.04  6.24 -0.19    -0.37 0.12
## LD4    4 99    0 1.20  -0.04    0.01 1.07 -2.78 2.74  5.52 -0.07    -0.32 0.12
## LD5    5 99    0 1.07  -0.01   -0.03 1.17 -2.42 2.61  5.03  0.16    -0.48 0.11
## LD6    6 99    0 1.05  -0.10   -0.04 0.91 -2.03 4.37  6.40  0.72     1.73 0.11
## LD7    7 99    0 1.03  -0.02   -0.01 0.83 -2.20 3.88  6.08  0.45     1.72 0.10
## LD8    8 99    0 0.98   0.13    0.03 1.09 -2.28 2.31  4.59 -0.26    -0.67 0.10
# Mengubah variabel dependen menjadi faktor
clean_data_perempuan[[dependent_var]] <- as.factor(clean_data_perempuan[[dependent_var]])
clean_data_laki_laki[[dependent_var]] <- as.factor(clean_data_laki_laki[[dependent_var]])

# Analisis Regresi Logistik Ordinal untuk Perempuan
formula_str_perempuan <- paste(dependent_var, "~", paste(colnames(d_scores_perempuan), collapse = " + "))
data_model_perempuan <- cbind(clean_data_perempuan[dependent_var], d_scores_perempuan)
colnames(data_model_perempuan)[1] <- dependent_var  # Pastikan nama kolom dependen sesuai
logistic_model_perempuan <- polr(as.formula(formula_str_perempuan), data = data_model_perempuan, Hess = TRUE)
summary(logistic_model_perempuan)
## Call:
## polr(formula = as.formula(formula_str_perempuan), data = data_model_perempuan, 
##     Hess = TRUE)
## 
## Coefficients:
##        Value Std. Error  t value
## LD1  0.61352     0.1123  5.46100
## LD2 -0.93021     0.1671 -5.56733
## LD3  0.78669     0.1802  4.36585
## LD4  0.45078     0.1891  2.38323
## LD5 -0.45263     0.2292 -1.97478
## LD6 -0.42589     0.1955 -2.17816
## LD7 -0.01837     0.2143 -0.08571
## 
## Intercepts:
##     Value   Std. Error t value
## 2|3 -3.4781  0.5564    -6.2511
## 3|4 -1.9384  0.4534    -4.2748
## 4|5 -1.4357  0.4284    -3.3514
## 5|6 -0.0058  0.3955    -0.0147
## 6|7  0.7173  0.4069     1.7629
## 7|8  1.7511  0.4425     3.9574
## 8|9  3.5216  0.5440     6.4733
## 
## Residual Deviance: 191.3588 
## AIC: 219.3588
# Uji Likelihood Ratio untuk godness of fit (Perempuan)
null_model_perempuan <- polr(as.formula(paste(dependent_var, "~ 1")), data = cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), Hess = TRUE)
lr_test_perempuan <- anova(null_model_perempuan, logistic_model_perempuan)
cat("\nUji Likelihood Ratio (Perempuan):\n")
## 
## Uji Likelihood Ratio (Perempuan):
print(lr_test_perempuan)
## Likelihood ratio tests of ordinal regression models
## 
## Response: Usia
##                                     Model Resid. df Resid. Dev   Test    Df
## 1                                       1        60   269.2548             
## 2 LD1 + LD2 + LD3 + LD4 + LD5 + LD6 + LD7        53   191.3588 1 vs 2     7
##   LR stat.      Pr(Chi)
## 1                      
## 2 77.89596 3.697043e-14
# Uji signifikan parsial (wald) untuk Perempuan
coeftest_result_perempuan <- coeftest(logistic_model_perempuan)
cat("\nUji Signifikansi Parsial (Wald) (Perempuan):\n")
## 
## Uji Signifikansi Parsial (Wald) (Perempuan):
print(coeftest_result_perempuan)
## 
## t test of coefficients:
## 
##      Estimate Std. Error t value  Pr(>|t|)    
## LD1  0.613523   0.112346  5.4610 1.285e-06 ***
## LD2 -0.930213   0.167084 -5.5673 8.754e-07 ***
## LD3  0.786687   0.180191  4.3659 5.903e-05 ***
## LD4  0.450777   0.189146  2.3832   0.02078 *  
## LD5 -0.452628   0.229204 -1.9748   0.05351 .  
## LD6 -0.425894   0.195530 -2.1782   0.03386 *  
## LD7 -0.018366   0.214288 -0.0857   0.93202    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Odds ratio untuk Perempuan
odds_ratios_perempuan <- exp(coef(logistic_model_perempuan))
cat("\nOdds Ratios (Perempuan):\n")
## 
## Odds Ratios (Perempuan):
print(odds_ratios_perempuan)
##       LD1       LD2       LD3       LD4       LD5       LD6       LD7 
## 1.8469260 0.3944698 2.1961080 1.5695315 0.6359548 0.6531854 0.9818020
# Analisis Regresi Logistik Ordinal untuk Laki-laki
formula_str_laki_laki <- paste(dependent_var, "~", paste(colnames(d_scores_laki_laki), collapse = " + "))
data_model_laki_laki <- cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki)
colnames(data_model_laki_laki)[1] <- dependent_var  # Pastikan nama kolom dependen sesuai
logistic_model_laki_laki <- polr(as.formula(formula_str_laki_laki), data = data_model_laki_laki, Hess = TRUE)
summary(logistic_model_laki_laki)
## Call:
## polr(formula = as.formula(formula_str_laki_laki), data = data_model_laki_laki, 
##     Hess = TRUE)
## 
## Coefficients:
##          Value Std. Error    t value
## LD1  1.0803591     0.1293  8.3532776
## LD2 -0.7905567     0.1408 -5.6140179
## LD3 -0.1778049     0.1614 -1.1014114
## LD4  0.2262238     0.1534  1.4751359
## LD5 -0.2026329     0.1752 -1.1567182
## LD6  0.1449148     0.1923  0.7537740
## LD7  0.1003944     0.1712  0.5864703
## LD8  0.0001511     0.1918  0.0007881
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2 -6.9161  0.9574    -7.2237
## 2|3 -3.9359  0.5025    -7.8322
## 3|4 -2.0117  0.3690    -5.4516
## 4|5 -0.7745  0.3125    -2.4787
## 5|6  0.6818  0.2992     2.2785
## 6|7  1.8571  0.3386     5.4847
## 7|8  2.9469  0.4070     7.2411
## 8|9  4.4349  0.5187     8.5506
## 
## Residual Deviance: 301.1963 
## AIC: 333.1963
# Uji Likelihood Ratio untuk godness of fit (Laki-laki)
null_model_laki_laki <- polr(as.formula(paste(dependent_var, "~ 1")), data = cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), Hess = TRUE)
lr_test_laki_laki <- anova(null_model_laki_laki, logistic_model_laki_laki)
cat("\nUji Likelihood Ratio (Laki-laki):\n")
## 
## Uji Likelihood Ratio (Laki-laki):
print(lr_test_laki_laki)
## Likelihood ratio tests of ordinal regression models
## 
## Response: Usia
##                                           Model Resid. df Resid. Dev   Test
## 1                                             1        91   419.4027       
## 2 LD1 + LD2 + LD3 + LD4 + LD5 + LD6 + LD7 + LD8        83   301.1963 1 vs 2
##      Df LR stat. Pr(Chi)
## 1                       
## 2     8 118.2064       0
# Uji signifikan parsial (wald) untuk Laki-laki
coeftest_result_laki_laki <- coeftest(logistic_model_laki_laki)
cat("\nUji Signifikansi Parsial (Wald) (Laki-laki):\n")
## 
## Uji Signifikansi Parsial (Wald) (Laki-laki):
print(coeftest_result_laki_laki)
## 
## t test of coefficients:
## 
##        Estimate  Std. Error t value  Pr(>|t|)    
## LD1  1.08035907  0.12933355  8.3533 1.284e-12 ***
## LD2 -0.79055670  0.14081834 -5.6140 2.554e-07 ***
## LD3 -0.17780489  0.16143367 -1.1014    0.2739    
## LD4  0.22622383  0.15335796  1.4751    0.1440    
## LD5 -0.20263288  0.17517912 -1.1567    0.2507    
## LD6  0.14491482  0.19225235  0.7538    0.4531    
## LD7  0.10039442  0.17118416  0.5865    0.5592    
## LD8  0.00015113  0.19178169  0.0008    0.9994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Odds ratio untuk Laki-laki
odds_ratios_laki_laki <- exp(coef(logistic_model_laki_laki))
cat("\nOdds Ratios (Laki-laki):\n")
## 
## Odds Ratios (Laki-laki):
print(odds_ratios_laki_laki)
##       LD1       LD2       LD3       LD4       LD5       LD6       LD7       LD8 
## 2.9457371 0.4535922 0.8371057 1.2538563 0.8165780 1.1559411 1.1056069 1.0001511
# Prediksi dan Evaluasi Model untuk Perempuan
predictions_perempuan <- predict(logistic_model_perempuan, newdata = cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), type = "class")
actual_perempuan <- clean_data_perempuan[[dependent_var]]
confusion_matrix_perempuan <- table(Prediction = predictions_perempuan, Actual = actual_perempuan)

cat("\nConfusion Matrix (Perempuan):\n")
## 
## Confusion Matrix (Perempuan):
print(confusion_matrix_perempuan)
##           Actual
## Prediction 2 3 4 5 6 7 8 9
##          2 7 3 1 0 0 1 0 0
##          3 3 3 1 2 1 1 0 0
##          4 0 0 0 0 0 0 0 0
##          5 1 3 1 4 2 0 0 2
##          6 0 0 0 0 0 0 0 0
##          7 0 0 0 1 0 0 1 0
##          8 0 0 0 2 2 5 8 3
##          9 0 0 0 0 0 0 3 6
accuracy_perempuan <- sum(diag(confusion_matrix_perempuan)) / sum(confusion_matrix_perempuan)
cat("\nAkurasi (Perempuan):\n")
## 
## Akurasi (Perempuan):
print(accuracy_perempuan)
## [1] 0.4179104
# Prediksi dan Evaluasi Model untuk Laki-laki
predictions_laki_laki <- predict(logistic_model_laki_laki, newdata = cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), type = "class")
actual_laki_laki <- clean_data_laki_laki[[dependent_var]]
confusion_matrix_laki_laki <- table(Prediction = predictions_laki_laki, Actual = actual_laki_laki)

cat("\nConfusion Matrix (Laki-laki):\n")
## 
## Confusion Matrix (Laki-laki):
print(confusion_matrix_laki_laki)
##           Actual
## Prediction  1  2  3  4  5  6  7  8  9
##          1  0  1  0  0  0  0  0  0  0
##          2  1  4  0  1  0  0  0  0  0
##          3  1  6 11  2  3  3  0  0  0
##          4  0  0  2  1  1  0  0  0  0
##          5  0  0  1  6  7  5  2  1  0
##          6  0  0  0  1  5  3  0  0  0
##          7  0  0  0  0  0  2  5  2  3
##          8  0  0  0  0  1  0  2  4  4
##          9  0  0  0  0  0  0  1  5  2
accuracy_laki_laki <- sum(diag(confusion_matrix_laki_laki)) / sum(confusion_matrix_laki_laki)
cat("\nAkurasi (Laki-laki):\n")
## 
## Akurasi (Laki-laki):
print(accuracy_laki_laki)
## [1] 0.3737374
# Cut-off analysis untuk Perempuan
roc_list_perempuan <- lapply(desc_vars_perempuan, function(var) {
  roc(clean_data_perempuan[[dependent_var]], clean_data_perempuan[[var]])
})
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
for (i in 1:length(roc_list_perempuan)) {
  var_name <- desc_vars_perempuan[i]
  roc_curve <- roc_list_perempuan[[i]]
  cut_off <- median(clean_data_perempuan[[var_name]], na.rm = TRUE)
  
  plot <- ggroc(roc_curve) +
    geom_vline(xintercept = 1 - roc_curve$specificities[which.max(roc_curve$sensitivities)], linetype = "dashed") +
    geom_hline(yintercept = roc_curve$sensitivities[which.max(roc_curve$sensitivities)], linetype = "dashed") +
    annotate("text", x = 1 - roc_curve$specificities[which.max(roc_curve$sensitivities)], 
             y = roc_curve$sensitivities[which.max(roc_curve$sensitivities)], 
             label = paste(cut_off, "\n(Sens:", round(roc_curve$sensitivities[which.max(roc_curve$sensitivities)], 3), 
                           ", Spec:", round(roc_curve$specificities[which.max(roc_curve$sensitivities)], 3), ")"), 
             hjust = 1, vjust = -1) +
    labs(title = var_name, x = "1 - Specificity", y = "Sensitivity")
  
  # Display the plot
  print(plot)
}

# Cut-off analysis untuk Laki-laki
roc_list_laki_laki <- lapply(desc_vars_laki_laki, function(var) {
  roc(clean_data_laki_laki[[dependent_var]], clean_data_laki_laki[[var]])
})
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
for (i in 1:length(roc_list_laki_laki)) {
  var_name <- desc_vars_laki_laki[i]
  roc_curve <- roc_list_laki_laki[[i]]
  cut_off <- median(clean_data_laki_laki[[var_name]], na.rm = TRUE)
  
  plot <- ggroc(roc_curve) +
    geom_vline(xintercept = 1 - roc_curve$specificities[which.max(roc_curve$sensitivities)], linetype = "dashed") +
    geom_hline(yintercept = roc_curve$sensitivities[which.max(roc_curve$sensitivities)], linetype = "dashed") +
    annotate("text", x = 1 - roc_curve$specificities[which.max(roc_curve$sensitivities)], 
             y = roc_curve$sensitivities[which.max(roc_curve$sensitivities)], 
             label = paste(cut_off, "\n(Sens:", round(roc_curve$sensitivities[which.max(roc_curve$sensitivities)], 3), 
                           ", Spec:", round(roc_curve$specificities[which.max(roc_curve$sensitivities)], 3), ")"), 
             hjust = 1, vjust = -1) +
    labs(title = var_name, x = "1 - Specificity", y = "Sensitivity")
  
  # Display the plot
  
  print(plot)
}

# Menggabungkan ROC untuk perempuan dan laki-laki
library(ggplot2)
library(pROC)

# Cut-off analysis untuk Perempuan
roc_list_perempuan <- lapply(desc_vars_perempuan, function(var) {
  roc(clean_data_perempuan[[dependent_var]], clean_data_perempuan[[var]])
})
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls > cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
## Warning in roc.default(clean_data_perempuan[[dependent_var]],
## clean_data_perempuan[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 2, case = 3
## Setting direction: controls < cases
# Cut-off analysis untuk Laki-laki
roc_list_laki_laki <- lapply(desc_vars_laki_laki, function(var) {
  roc(clean_data_laki_laki[[dependent_var]], clean_data_laki_laki[[var]])
})
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
## Warning in roc.default(clean_data_laki_laki[[dependent_var]],
## clean_data_laki_laki[[var]]): 'response' has more than two levels. Consider
## setting 'levels' explicitly or using 'multiclass.roc' instead
## Setting levels: control = 1, case = 2
## Setting direction: controls > cases
# Membuat plot untuk setiap variabel
for (i in 1:length(roc_list_perempuan)) {
  var_name <- desc_vars_perempuan[i]
  
  # ROC untuk perempuan
  roc_curve_perempuan <- roc_list_perempuan[[i]]
  cut_off_perempuan <- median(clean_data_perempuan[[var_name]], na.rm = TRUE)
  
  # ROC untuk laki-laki
  roc_curve_laki_laki <- roc_list_laki_laki[[i]]
  cut_off_laki_laki <- median(clean_data_laki_laki[[var_name]], na.rm = TRUE)
  
  # Dataframe untuk plot
  plot_data_perempuan <- data.frame(
    Specificity = 1 - roc_curve_perempuan$specificities,
    Sensitivity = roc_curve_perempuan$sensitivities,
    Gender = "Perempuan"
  )
  
  plot_data_laki_laki <- data.frame(
    Specificity = 1 - roc_curve_laki_laki$specificities,
    Sensitivity = roc_curve_laki_laki$sensitivities,
    Gender = "Laki-laki"
  )
  
  plot_data <- rbind(plot_data_perempuan, plot_data_laki_laki)
  
  # Membuat plot ROC
  plot <- ggplot(plot_data, aes(x = Specificity, y = Sensitivity, color = Gender)) +
    geom_line() +
    geom_vline(xintercept = 1 - roc_curve_perempuan$specificities[which.max(roc_curve_perempuan$sensitivities)], linetype = "dashed", color = "red") +
    geom_hline(yintercept = roc_curve_perempuan$sensitivities[which.max(roc_curve_perempuan$sensitivities)], linetype = "dashed", color = "red") +
    geom_vline(xintercept = 1 - roc_curve_laki_laki$specificities[which.max(roc_curve_laki_laki$sensitivities)], linetype = "dashed", color = "blue") +
    geom_hline(yintercept = roc_curve_laki_laki$sensitivities[which.max(roc_curve_laki_laki$sensitivities)], linetype = "dashed", color = "blue") +
    annotate("text", x = 1 - roc_curve_perempuan$specificities[which.max(roc_curve_perempuan$sensitivities)], 
             y = roc_curve_perempuan$sensitivities[which.max(roc_curve_perempuan$sensitivities)], 
             label = paste(cut_off_perempuan, "\n(Sens:", round(roc_curve_perempuan$sensitivities[which.max(roc_curve_perempuan$sensitivities)], 3), 
                           ", Spec:", round(roc_curve_perempuan$specificities[which.max(roc_curve_perempuan$sensitivities)], 3), ")"), 
             hjust = 1, vjust = -1, color = "red") +
    annotate("text", x = 1 - roc_curve_laki_laki$specificities[which.max(roc_curve_laki_laki$sensitivities)], 
             y = roc_curve_laki_laki$sensitivities[which.max(roc_curve_laki_laki$sensitivities)], 
             label = paste(cut_off_laki_laki, "\n(Sens:", round(roc_curve_laki_laki$sensitivities[which.max(roc_curve_laki_laki$sensitivities)], 3), 
                           ", Spec:", round(roc_curve_laki_laki$specificities[which.max(roc_curve_laki_laki$sensitivities)], 3), ")"), 
             hjust = 1, vjust = -1, color = "blue") +
    labs(title = var_name, x = "1 - Specificity", y = "Sensitivity") +
    scale_color_manual(values = c("Perempuan" = "red", "Laki-laki" = "blue"))
  
  # Display the plot
  print(plot)
}

# Kesimpulan
# Menghubungkan hasil regresi dengan kontribusi variabel asli
contributions_df_perempuan <- data.frame(
  Discriminant = colnames(d_scores_perempuan),
  Variable = top_contributors_perempuan,
  Estimate = coef(logistic_model_perempuan),
  Std.Error = sqrt(diag(vcov(logistic_model_perempuan))),
  z.value = coef(logistic_model_perempuan) / sqrt(diag(vcov(logistic_model_perempuan))),
  p.value = 2 * (1 - pnorm(abs(coef(logistic_model_perempuan) / sqrt(diag(vcov(logistic_model_perempuan))))))
)
## Warning in data.frame(Discriminant = colnames(d_scores_perempuan), Variable =
## top_contributors_perempuan, : row names were found from a short variable and
## have been discarded
contributions_df_perempuan$Significance <- ifelse(contributions_df_perempuan$p.value < 0.001, "Signifikan", "Tidak Signifikan")

cat("\nKesimpulan (Perempuan):\n")
## 
## Kesimpulan (Perempuan):
print(contributions_df_perempuan)
##    Discriminant Variable    Estimate Std.Error     z.value      p.value
## 1           LD1   MRB_D_  0.61352265 0.1123462  5.46099916 4.734623e-08
## 2           LD2   MRH_D_ -0.93021279 0.1670842 -5.56732963 2.586728e-08
## 3           LD3   MRB_S_  0.78668671 0.1801909  4.36585245 1.266279e-05
## 4           LD4   MrB_D_  0.45077718 0.1891457  2.38322728 1.716160e-02
## 5           LD5   CRH_D_ -0.45262776 0.2292044 -1.97477792 4.829336e-02
## 6           LD6   MRH_S_ -0.42589430 0.1955296 -2.17815800 2.939428e-02
## 7           LD7   MRB_S_ -0.01836564 0.2142880 -0.08570538 9.317006e-01
## 8           LD1   MRB_D_  0.61352265 0.5563929  1.10267880 2.701667e-01
## 9           LD2   MRH_D_ -0.93021279 0.4534362 -2.05147455 4.022076e-02
## 10          LD3   MRB_S_  0.78668671 0.4284008  1.83633356 6.630834e-02
## 11          LD4   MrB_D_  0.45077718 0.3954850  1.13980862 2.543660e-01
## 12          LD5   CRH_D_ -0.45262776 0.4068891 -1.11241050 2.659617e-01
## 13          LD6   MRH_S_ -0.42589430 0.4424872 -0.96250081 3.357981e-01
## 14          LD7   MRB_S_ -0.01836564 0.5440165 -0.03375933 9.730691e-01
##        Significance
## 1        Signifikan
## 2        Signifikan
## 3        Signifikan
## 4  Tidak Signifikan
## 5  Tidak Signifikan
## 6  Tidak Signifikan
## 7  Tidak Signifikan
## 8  Tidak Signifikan
## 9  Tidak Signifikan
## 10 Tidak Signifikan
## 11 Tidak Signifikan
## 12 Tidak Signifikan
## 13 Tidak Signifikan
## 14 Tidak Signifikan
contributions_df_laki_laki <- data.frame(
  Discriminant = colnames(d_scores_laki_laki),
  Variable = top_contributors_laki_laki,
  Estimate = coef(logistic_model_laki_laki),
  Std.Error = sqrt(diag(vcov(logistic_model_laki_laki))),
  z.value = coef(logistic_model_laki_laki) / sqrt(diag(vcov(logistic_model_laki_laki))),
  p.value = 2 * (1 - pnorm(abs(coef(logistic_model_laki_laki) / sqrt(diag(vcov(logistic_model_laki_laki))))))
)
## Warning in data.frame(Discriminant = colnames(d_scores_laki_laki), Variable =
## top_contributors_laki_laki, : row names were found from a short variable and
## have been discarded
contributions_df_laki_laki$Significance <- ifelse(contributions_df_laki_laki$p.value < 0.001, "Signifikan", "Tidak Signifikan")

cat("\nKesimpulan (Laki-laki):\n")
## 
## Kesimpulan (Laki-laki):
print(contributions_df_laki_laki)
##    Discriminant Variable      Estimate Std.Error       z.value      p.value
## 1           LD1   MBH_S_  1.0803590747 0.1293336  8.3532776491 0.000000e+00
## 2           LD2   MrB_S_ -0.7905566971 0.1408183 -5.6140179489 1.976817e-08
## 3           LD3   CRH_D_ -0.1778048929 0.1614337 -1.1014114233 2.707176e-01
## 4           LD4   CRH_D_  0.2262238312 0.1533580  1.4751358818 1.401760e-01
## 5           LD5   MRB_D_ -0.2026328764 0.1751791 -1.1567182237 2.473875e-01
## 6           LD6   MRH_D_  0.1449148210 0.1922524  0.7537739894 4.509849e-01
## 7           LD7   CRH_D_  0.1003944158 0.1711842  0.5864702534 5.575595e-01
## 8           LD8   MRB_S_  0.0001511338 0.1917817  0.0007880510 9.993712e-01
## 9           LD1   MBH_S_  1.0803590747 0.9574196  1.1284071380 2.591480e-01
## 10          LD2   MrB_S_ -0.7905566971 0.5025240 -1.5731718808 1.156790e-01
## 11          LD3   CRH_D_ -0.1778048929 0.3690168 -0.4818341212 6.299238e-01
## 12          LD4   CRH_D_  0.2262238312 0.3124772  0.7239690915 4.690847e-01
## 13          LD5   MRB_D_ -0.2026328764 0.2992184 -0.6772073198 4.982744e-01
## 14          LD6   MRH_D_  0.1449148210 0.3385924  0.4279920053 6.686569e-01
## 15          LD7   CRH_D_  0.1003944158 0.4069656  0.2466901949 8.051480e-01
## 16          LD8   MRB_S_  0.0001511338 0.5186669  0.0002913889 9.997675e-01
##        Significance
## 1        Signifikan
## 2        Signifikan
## 3  Tidak Signifikan
## 4  Tidak Signifikan
## 5  Tidak Signifikan
## 6  Tidak Signifikan
## 7  Tidak Signifikan
## 8  Tidak Signifikan
## 9  Tidak Signifikan
## 10 Tidak Signifikan
## 11 Tidak Signifikan
## 12 Tidak Signifikan
## 13 Tidak Signifikan
## 14 Tidak Signifikan
## 15 Tidak Signifikan
## 16 Tidak Signifikan
# Menyediakan ringkasan dengan estimasi parameter untuk Perempuan
summary_table_perempuan <- summary(logistic_model_perempuan)
coefficients_perempuan <- summary_table_perempuan$coefficients[1:length(colnames(d_scores_perempuan)), ]
std_errors_perempuan <- coefficients_perempuan[, "Std. Error"]
z_values_perempuan <- coefficients_perempuan[, "t value"]
p_values_perempuan <- 2 * (1 - pnorm(abs(z_values_perempuan)))

# Menyediakan ringkasan dengan estimasi parameter untuk Laki-laki
summary_table_laki_laki <- summary(logistic_model_laki_laki)
coefficients_laki_laki <- summary_table_laki_laki$coefficients[1:length(colnames(d_scores_laki_laki)), ]
std_errors_laki_laki <- coefficients_laki_laki[, "Std. Error"]
z_values_laki_laki <- coefficients_laki_laki[, "t value"]
p_values_laki_laki <- 2 * (1 - pnorm(abs(z_values_laki_laki)))

# Sesuaikan dengan nama variabel asli
variable_names_perempuan <- top_contributors_perempuan
variable_names_laki_laki <- top_contributors_laki_laki

results_df_perempuan <- data.frame(
  Discriminant = colnames(d_scores_perempuan),
  Variable = variable_names_perempuan,
  Estimate = coefficients_perempuan[, "Value"],
  Std.Error = std_errors_perempuan,
  z.value = z_values_perempuan,
  p.value = p_values_perempuan
)

cat("\nEstimasi Parameter, Std. Error, dan P-value (Perempuan):\n")
## 
## Estimasi Parameter, Std. Error, dan P-value (Perempuan):
print(results_df_perempuan)
##     Discriminant Variable    Estimate Std.Error     z.value      p.value
## LD1          LD1   MRB_D_  0.61352265 0.1123462  5.46099916 4.734623e-08
## LD2          LD2   MRH_D_ -0.93021279 0.1670842 -5.56732963 2.586728e-08
## LD3          LD3   MRB_S_  0.78668671 0.1801909  4.36585245 1.266279e-05
## LD4          LD4   MrB_D_  0.45077718 0.1891457  2.38322728 1.716160e-02
## LD5          LD5   CRH_D_ -0.45262776 0.2292044 -1.97477792 4.829336e-02
## LD6          LD6   MRH_S_ -0.42589430 0.1955296 -2.17815800 2.939428e-02
## LD7          LD7   MRB_S_ -0.01836564 0.2142880 -0.08570538 9.317006e-01
results_df_laki_laki <- data.frame(
  Discriminant = colnames(d_scores_laki_laki),
  Variable = variable_names_laki_laki,
  Estimate = coefficients_laki_laki[, "Value"],
  Std.Error = std_errors_laki_laki,
  z.value = z_values_laki_laki,
  p.value = p_values_laki_laki
)

cat("\nEstimasi Parameter, Std. Error, dan P-value (Laki-laki):\n")
## 
## Estimasi Parameter, Std. Error, dan P-value (Laki-laki):
print(results_df_laki_laki)
##     Discriminant Variable      Estimate Std.Error      z.value      p.value
## LD1          LD1   MBH_S_  1.0803590747 0.1293336  8.353277649 0.000000e+00
## LD2          LD2   MrB_S_ -0.7905566971 0.1408183 -5.614017949 1.976817e-08
## LD3          LD3   CRH_D_ -0.1778048929 0.1614337 -1.101411423 2.707176e-01
## LD4          LD4   CRH_D_  0.2262238312 0.1533580  1.475135882 1.401760e-01
## LD5          LD5   MRB_D_ -0.2026328764 0.1751791 -1.156718224 2.473875e-01
## LD6          LD6   MRH_D_  0.1449148210 0.1922524  0.753773989 4.509849e-01
## LD7          LD7   CRH_D_  0.1003944158 0.1711842  0.586470253 5.575595e-01
## LD8          LD8   MRB_S_  0.0001511338 0.1917817  0.000788051 9.993712e-01
# Kesimpulan untuk Perempuan
kesimpulan_perempuan <- contributions_df_perempuan %>%
  group_by(Variable) %>%
  summarise(Significance = list(Significance), 
            Estimate = list(Estimate))

cat("\nKesimpulan (Perempuan):\n")
## 
## Kesimpulan (Perempuan):
for (i in 1:nrow(kesimpulan_perempuan)) {
  var <- kesimpulan_perempuan$Variable[i]
  signifikan_ld <- contributions_df_perempuan %>% filter(Variable == var & Significance == "Signifikan") %>% select(Discriminant, Estimate)
  tidak_signifikan_ld <- contributions_df_perempuan %>% filter(Variable == var & Significance == "Tidak Signifikan") %>% select(Discriminant, Estimate)
  
  cat(var, ":\n")
  if (nrow(signifikan_ld) > 0) {
    cat("  Signifikan di: \n")
    for (j in 1:nrow(signifikan_ld)) {
      cat("    ", signifikan_ld$Discriminant[j], "dengan estimasi parameter", signifikan_ld$Estimate[j], "\n")
    }
  }
  if (nrow(tidak_signifikan_ld) > 0) {
    cat("  Tidak Signifikan di: \n")
    for (j in 1:nrow(tidak_signifikan_ld)) {
      cat("    ", tidak_signifikan_ld$Discriminant[j], "dengan estimasi parameter", tidak_signifikan_ld$Estimate[j], "\n")
    }
  }
}
## CRH_D_ :
##   Tidak Signifikan di: 
##      LD5 dengan estimasi parameter -0.4526278 
##      LD5 dengan estimasi parameter -0.4526278 
## MRB_D_ :
##   Signifikan di: 
##      LD1 dengan estimasi parameter 0.6135226 
##   Tidak Signifikan di: 
##      LD1 dengan estimasi parameter 0.6135226 
## MRB_S_ :
##   Signifikan di: 
##      LD3 dengan estimasi parameter 0.7866867 
##   Tidak Signifikan di: 
##      LD7 dengan estimasi parameter -0.01836564 
##      LD3 dengan estimasi parameter 0.7866867 
##      LD7 dengan estimasi parameter -0.01836564 
## MRH_D_ :
##   Signifikan di: 
##      LD2 dengan estimasi parameter -0.9302128 
##   Tidak Signifikan di: 
##      LD2 dengan estimasi parameter -0.9302128 
## MRH_S_ :
##   Tidak Signifikan di: 
##      LD6 dengan estimasi parameter -0.4258943 
##      LD6 dengan estimasi parameter -0.4258943 
## MrB_D_ :
##   Tidak Signifikan di: 
##      LD4 dengan estimasi parameter 0.4507772 
##      LD4 dengan estimasi parameter 0.4507772
# Mengambil estimasi parameter yang signifikan saja untuk Perempuan
selected_estimates_perempuan <- contributions_df_perempuan %>%
  filter(Significance == "Signifikan") %>%
  select(Variable, Discriminant, Estimate)

cat("\nEstimasi Parameter yang Diambil (Perempuan):\n")
## 
## Estimasi Parameter yang Diambil (Perempuan):
print(selected_estimates_perempuan)
##   Variable Discriminant   Estimate
## 1   MRB_D_          LD1  0.6135226
## 2   MRH_D_          LD2 -0.9302128
## 3   MRB_S_          LD3  0.7866867
# Kesimpulan untuk Laki-laki
kesimpulan_laki_laki <- contributions_df_laki_laki %>%
  group_by(Variable) %>%
  summarise(Significance = list(Significance), 
            Estimate = list(Estimate))

cat("\nKesimpulan (Laki-laki):\n")
## 
## Kesimpulan (Laki-laki):
for (i in 1:nrow(kesimpulan_laki_laki)) {
  var <- kesimpulan_laki_laki$Variable[i]
  signifikan_ld <- contributions_df_laki_laki %>% filter(Variable == var & Significance == "Signifikan") %>% select(Discriminant, Estimate)
  tidak_signifikan_ld <- contributions_df_laki_laki %>% filter(Variable == var & Significance == "Tidak Signifikan") %>% select(Discriminant, Estimate)
  
  cat(var, ":\n")
  if (nrow(signifikan_ld) > 0) {
    cat("  Signifikan di: \n")
    for (j in 1:nrow(signifikan_ld)) {
      cat("    ", signifikan_ld$Discriminant[j], "dengan estimasi parameter", signifikan_ld$Estimate[j], "\n")
    }
  }
  if (nrow(tidak_signifikan_ld) > 0) {
    cat("  Tidak Signifikan di: \n")
    for (j in 1:nrow(tidak_signifikan_ld)) {
      cat("    ", tidak_signifikan_ld$Discriminant[j], "dengan estimasi parameter", tidak_signifikan_ld$Estimate[j], "\n")
    }
  }
}
## CRH_D_ :
##   Tidak Signifikan di: 
##      LD3 dengan estimasi parameter -0.1778049 
##      LD4 dengan estimasi parameter 0.2262238 
##      LD7 dengan estimasi parameter 0.1003944 
##      LD3 dengan estimasi parameter -0.1778049 
##      LD4 dengan estimasi parameter 0.2262238 
##      LD7 dengan estimasi parameter 0.1003944 
## MBH_S_ :
##   Signifikan di: 
##      LD1 dengan estimasi parameter 1.080359 
##   Tidak Signifikan di: 
##      LD1 dengan estimasi parameter 1.080359 
## MRB_D_ :
##   Tidak Signifikan di: 
##      LD5 dengan estimasi parameter -0.2026329 
##      LD5 dengan estimasi parameter -0.2026329 
## MRB_S_ :
##   Tidak Signifikan di: 
##      LD8 dengan estimasi parameter 0.0001511338 
##      LD8 dengan estimasi parameter 0.0001511338 
## MRH_D_ :
##   Tidak Signifikan di: 
##      LD6 dengan estimasi parameter 0.1449148 
##      LD6 dengan estimasi parameter 0.1449148 
## MrB_S_ :
##   Signifikan di: 
##      LD2 dengan estimasi parameter -0.7905567 
##   Tidak Signifikan di: 
##      LD2 dengan estimasi parameter -0.7905567
# Mengambil estimasi parameter yang signifikan saja untuk Laki-laki
selected_estimates_laki_laki <- contributions_df_laki_laki %>%
  filter(Significance == "Signifikan") %>%
  select(Variable, Discriminant, Estimate)

cat("\nEstimasi Parameter yang Diambil (Laki-laki):\n")
## 
## Estimasi Parameter yang Diambil (Laki-laki):
print(selected_estimates_laki_laki)
##   Variable Discriminant   Estimate
## 1   MBH_S_          LD1  1.0803591
## 2   MrB_S_          LD2 -0.7905567
# Menggabungkan variabel yang sama dan mengambil estimasi dengan p-value paling rendah untuk Perempuan
selected_estimates_perempuan <- contributions_df_perempuan %>%
  filter(Significance == "Signifikan") %>%
  group_by(Variable) %>%
  arrange(p.value) %>%
  slice(1) %>%
  select(Variable, Discriminant, Estimate, p.value)

cat("\nEstimasi Parameter yang Diambil (Perempuan):\n")
## 
## Estimasi Parameter yang Diambil (Perempuan):
print(selected_estimates_perempuan)
## # A tibble: 3 × 4
## # Groups:   Variable [3]
##   Variable Discriminant Estimate      p.value
##   <chr>    <chr>           <dbl>        <dbl>
## 1 MRB_D_   LD1             0.614 0.0000000473
## 2 MRB_S_   LD3             0.787 0.0000127   
## 3 MRH_D_   LD2            -0.930 0.0000000259
# Menggabungkan variabel yang sama dan mengambil estimasi dengan p-value paling rendah untuk Laki-laki
selected_estimates_laki_laki <- contributions_df_laki_laki %>%
  filter(Significance == "Signifikan") %>%
  group_by(Variable) %>%
  arrange(p.value) %>%
  slice(1) %>%
  select(Variable, Discriminant, Estimate, p.value)

cat("\nEstimasi Parameter yang Diambil (Laki-laki):\n")
## 
## Estimasi Parameter yang Diambil (Laki-laki):
print(selected_estimates_laki_laki)
## # A tibble: 2 × 4
## # Groups:   Variable [2]
##   Variable Discriminant Estimate      p.value
##   <chr>    <chr>           <dbl>        <dbl>
## 1 MBH_S_   LD1             1.08  0           
## 2 MrB_S_   LD2            -0.791 0.0000000198
library(ggplot2)
library(dplyr)
library(readxl)

# Memastikan nama kolom yang benar
colnames(data) <- c("Usia", "Jenis_Kelamin", colnames(data)[3:ncol(data)])

# Menggabungkan data laki-laki dan perempuan
data_combined <- bind_rows(
  data %>% filter(Jenis_Kelamin == "Perempuan"),
  data %>% filter(Jenis_Kelamin == "Laki-laki")
)

# Memetakan kategori usia
map_usia_category <- function(usia) {
  if (usia == 1) {
    return("6-10 tahun")
  } else if (usia == 2) {
    return("11-15 tahun")
  } else if (usia == 3) {
    return("16-20 tahun")
  } else if (usia == 4) {
    return("21-25 tahun")
  } else if (usia == 5) {
    return("26-30 tahun")
  } else if (usia == 6) {
    return("31-35 tahun")
  } else if (usia == 7) {
    return("36-40 tahun")
  } else if (usia == 8) {
    return("41-45 tahun")
  } else if (usia == 9) {
    return("46-50 tahun")
  } else {
    return("Unknown")
  }
}

data_combined <- data_combined %>%
  mutate(Kategori_Usia = sapply(Usia, map_usia_category))

category_order <- c("6-10 tahun", "11-15 tahun", "16-20 tahun", "21-25 tahun",
                    "26-30 tahun", "31-35 tahun", "36-40 tahun", "41-45 tahun", "46-50 tahun")

data_combined$Kategori_Usia <- factor(data_combined$Kategori_Usia, levels = category_order, ordered = TRUE)

usia_count_combined <- data_combined %>%
  group_by(Kategori_Usia, Jenis_Kelamin) %>%
  summarise(Frekuensi = n(), .groups = 'drop')

# Membuat barplot dengan ggplot2
barplot_combined <- ggplot(usia_count_combined, aes(x = Kategori_Usia, y = Frekuensi, fill = Jenis_Kelamin)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Frekuensi Kategori Usia Berdasarkan Jenis Kelamin", x = "Kategori Usia", y = "Frekuensi") +
  scale_fill_manual(values = c("Perempuan" = "pink", "Laki-laki" = "blue"))

print(barplot_combined)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ dplyr::recode()  masks car::recode()
## ✖ plotly::select() masks MASS::select(), dplyr::select()
## ✖ purrr::some()    masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cutpointr)
## 
## Attaching package: 'cutpointr'
## 
## The following objects are masked from 'package:pROC':
## 
##     auc, roc
# Daftar variabel yang akan dianalisis
variables <- colnames(data)[3:ncol(data)]

# Fungsi untuk melakukan cut-point analysis pada setiap variabel
cutpoint_analysis <- function(var, data) {
  data <- data %>% drop_na(var)  # Menghapus baris yang mengandung NA pada variabel yang dianalisis
  median_data <- mean(data[[var]], na.rm = TRUE)
  data$class <- ifelse(data[[var]] > median_data, 1, 2)
  if (length(unique(data$class)) == 2) {  # Pastikan ada dua kelas
    opt_cut <- cutpointr(data, !!sym(var), class, method = minimize_metric, metric = misclassification_cost, na.rm = TRUE)
    cat("\nAnalisis untuk variabel:", var, "\n")
    print(summary(opt_cut))
    plot_obj = plot_metric(opt_cut)
    print(plot_obj)
    
    # Hitung p-value menggunakan uji statistik yang sesuai
    # Contoh: uji t-test untuk membandingkan rata-rata kedua kelompok
    p_value <- t.test(data[[var]] ~ data$class)$p.value
    cat("P-value:", p_value, "\n") 
  } else {
    cat("\nVariabel", var, "tidak memiliki dua kelas setelah pembagian berdasarkan median.\n")
  }
}

# Melakukan analisis untuk setiap variabel
for (var in variables) {
  cutpoint_analysis(var, data_perempuan)
}
## Assuming the positive class is 1
## Assuming the positive class has higher x values
## 
## Analisis untuk variabel: BB1 
## Method: minimize_metric 
## Predictor: BB1 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    62    42
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             116.8                      0   1           1           1 62  0  0
##  tn
##  42
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu.  Median     Mean 3rd Qu.     95%  Max.       SD
##  Overall  95.0 102.990 112.000 118.400 116.7592 121.725 125.625 131.9 7.180112
##        1 116.8 117.300 119.725 121.155 121.6784 123.300 127.855 131.9 3.143271
##        2  95.0 100.835 107.575 111.200 109.4976 113.075 114.590 116.3 4.919820
##  NAs
##    0
##    0
##    0
## P-value: 2.188508e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: BB2 
## Method: minimize_metric 
## Predictor: BB2 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    53    51
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              93.4                      0   1           1           1 53  0  0
##  tn
##  51
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95%  Max.       SD NAs
##  Overall 81.9 85.345  89.575   93.4 93.34904  96.625 100.95 106.4 5.100974   0
##        1 93.4 93.560  95.300   96.6 97.36415  98.600 103.82 106.4 2.995066   0
##        2 81.9 82.550  87.800   89.5 89.17647  91.350  93.10  93.3 3.069999   0
## P-value: 5.999011e-25
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: BB3 
## Method: minimize_metric 
## Predictor: BB3 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    56    48
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              92.4                      0   1           1           1 56  0  0
##  tn
##  48
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.     95%  Max.       SD NAs
##  Overall 72.4 78.775  89.900  92.75 92.31538  96.800 100.740 104.4 6.558841   0
##        1 92.4 92.575  94.775  96.65 96.88750  98.925 101.225 104.4 2.866297   0
##        2 72.4 74.260  84.975  89.45 86.98125  90.925  91.865  92.1 5.540284   0
## P-value: 4.86074e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MA_D_ 
## Method: minimize_metric 
## Predictor: MA_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    55    49
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             127.1                      0   1           1           1 55  0  0
##  tn
##  49
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median     Mean 3rd Qu.    95%  Max.       SD
##  Overall 104.9 116.465 122.875 127.25 126.8713  130.25 137.00 141.0 6.421450
##        1 127.1 127.270 128.700 130.20 131.5836  134.05 138.97 141.0 3.688157
##        2 104.9 112.860 120.000 122.40 121.5820  125.10 126.20 126.7 4.379161
##  NAs
##    0
##    0
##    0
## P-value: 9.327152e-22
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MA_S_ 
## Method: minimize_metric 
## Predictor: MA_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    46    58
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             128.1                      0   1           1           1 46  0  0
##  tn
##  58
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median     Mean 3rd Qu.     95%  Max.        SD
##  Overall 104.9 118.900   122.4 126.50 128.0404  131.45 135.895 276.2 15.815511
##        1 128.1 128.325   130.1 132.10 135.1935  133.20 138.775 276.2 21.440350
##        2 104.9 117.790   120.2 122.95 122.3672  125.65 127.130 127.8  3.877664
##  NAs
##    0
##    0
##    0
## P-value: 0.0002171657
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RL_D_ 
## Method: minimize_metric 
## Predictor: RL_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    58    46
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              52.5                      0   1           1           1 58  0  0
##  tn
##  46
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 36.7 41.020  48.425   52.8 52.45577  56.900 61.680 69.3 6.420520   0
##        1 52.5 52.685  53.875   56.4 56.91207  58.675 64.775 69.3 3.721963   0
##        2 36.7 39.675  43.475   47.4 46.83696  50.725 52.100 52.4 4.351978   0
## P-value: 2.940521e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RL_S_ 
## Method: minimize_metric 
## Predictor: RL_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    58    46
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              52.5                      0   1           1           1 58  0  0
##  tn
##  46
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 36.5 41.105  49.175  52.80 52.24904  56.525 60.80 65.1 6.016998   0
##        1 52.5 52.685  54.000  55.85 56.47414  58.275 62.42 65.1 3.172445   0
##        2 36.5 39.375  43.950  47.55 46.92174  50.275 52.00 52.2 4.254405   0
## P-value: 6.10901e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CRH_D_ 
## Method: minimize_metric 
## Predictor: CRH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    58    46
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              57.2                      0   1           1           1 58  0  0
##  tn
##  46
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 41.5 45.28   53.55  57.85 56.55865  60.225 64.910 84.8 6.589615   0
##        1 57.2 57.57   58.40  60.10 61.04138  62.075 66.960 84.8 4.162575   0
##        2 41.5 43.00   47.55  51.20 50.90652  54.875 55.975 56.4 4.345849   0
## P-value: 8.64658e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CRH_S_ 
## Method: minimize_metric 
## Predictor: CRH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    62    42
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              56.2                      0   1           1           1 62  0  0
##  tn
##  42
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 40.6 45.945  52.075  57.45 56.18365  60.200 63.155 68.5 5.729063   0
##        1 56.2 56.800  58.100  59.90 60.05161  61.725 64.190 68.5 2.547949   0
##        2 40.6 42.425  48.025  51.00 50.47381  54.150 55.895 56.1 4.095748   0
## P-value: 3.653715e-20
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CL_D_ 
## Method: minimize_metric 
## Predictor: CL_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    60    44
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              55.2                      0   1           1           1 60  0  0
##  tn
##  44
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 39.3 43.395  51.400  56.20 54.78558  58.025 62.085 80.6 6.296068   0
##        1 55.2 55.680  56.775  57.95 58.96500  60.650 64.865 80.6 3.900449   0
##        2 39.3 42.005  46.000  49.65 49.08636  52.700 53.585 54.6 4.058158   0
## P-value: 2.289074e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CL_S_ 
## Method: minimize_metric 
## Predictor: CL_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    59    45
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##                55                      0   1           1           1 59  0  0
##  tn
##  45
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall   30 44.00  50.975   56.1 54.61154  58.325 60.97 84.3 6.622194   0
##        1   55 55.88  56.950   58.2 58.85085  59.800 61.87 84.3 4.067083   0
##        2   30 41.12  46.600   50.2 49.05333  53.100 54.20 54.3 5.015820   0
## P-value: 2.666421e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RH_D_ 
## Method: minimize_metric 
## Predictor: RH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    54    50
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              47.1                      0   1           1           1 54  0  0
##  tn
##  50
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 31.8 33.805  40.775  47.20 46.88750  51.250 58.965 66.8 7.525280   0
##        1 47.1 47.330  50.200  51.15 52.69444  54.175 60.140 66.8 4.390187   0
##        2 31.8 32.090  37.925  40.65 40.61600  45.050 46.420 46.6 4.571132   0
## P-value: 8.841388e-25
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RH_S_ 
## Method: minimize_metric 
## Predictor: RH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    51    53
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              47.1                      0   1           1           1 51  0  0
##  tn
##  53
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 31.5 33.99  41.875  46.55 47.09808   51.00 59.67 83.1 8.256212   0
##        1 47.1 47.75  50.050  51.00 53.39216   54.95 62.90 83.1 6.407241   0
##        2 31.5 32.20  38.400  41.90 41.04151   44.50 46.08 46.6 4.393677   0
## P-value: 4.490518e-19
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: ML_D_ 
## Method: minimize_metric 
## Predictor: ML_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    60    44
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              70.8                      0   1           1           1 60  0  0
##  tn
##  44
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 53.1 58.335  66.950  72.05 70.78942  75.100 78.57 86.2 6.343698   0
##        1 70.8 71.280  72.775  74.30 75.11500  77.050 79.42 86.2 3.207767   0
##        2 53.1 57.545  62.150  66.35 64.89091  69.025 70.30 70.6 4.535074   0
## P-value: 2.444656e-20
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: ML_S_ 
## Method: minimize_metric 
## Predictor: ML_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    65    39
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              70.9                      0   1           1           1 65  0  0
##  tn
##  39
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 51.5 58.005   67.35   72.2 70.89038    75.2 78.54 82.7 6.025617   0
##        1 70.9 71.140   72.80   74.0 74.63077    76.1 79.02 82.7 2.644388   0
##        2 51.5 56.820   62.00   66.0 64.65641    68.6 70.21 70.5 4.777072   0
## P-value: 1.312187e-16
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRH_D_ 
## Method: minimize_metric 
## Predictor: MRH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    52    52
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              55.2                      0   1           1           1 52  0  0
##  tn
##  52
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 38.7 44.145  51.200  54.95 55.02115  59.525 65.41 82.8 6.881023   0
##        1 55.2 55.410  57.000  59.55 60.20000  61.850 67.31 82.8 4.759387   0
##        2 38.7 41.420  46.875  51.10 49.84231  53.425 54.60 54.7 4.275619   0
## P-value: 1.9593e-20
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRH_S_ 
## Method: minimize_metric 
## Predictor: MRH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    54    50
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              54.9                      0   1           1           1 54  0  0
##  tn
##  50
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 37.1 44.115  50.950  55.05 54.71538  58.000 64.710 80.4 6.516533   0
##        1 54.9 55.200  56.125  57.90 59.34074  61.575 65.645 80.4 4.552773   0
##        2 37.1 41.435  47.325  50.60 49.72000  52.975 54.510 54.7 4.219440   0
## P-value: 1.972345e-19
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRB_D_ 
## Method: minimize_metric 
## Predictor: MRB_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    46    58
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              41.2                      0   1           1           1 46  0  0
##  tn
##  58
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 29.9 33.730  38.100  40.55 41.11154  43.800 48.695 65.5 5.693521   0
##        1 41.2 41.550  42.775  44.00 45.44565  45.150 60.225 65.5 5.450289   0
##        2 29.9 31.805  35.800  38.60 37.67414  40.075 40.715 41.1 2.818209   0
## P-value: 1.375304e-12
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRB_S_ 
## Method: minimize_metric 
## Predictor: MRB_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    51    53
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              41.3                      0   1           1           1 51  0  0
##  tn
##  53
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 29.8 33.245    38.2  41.15 41.23894    43.2 47.67 65.4 5.493416   0
##        1 41.3 41.450    42.2  43.20 44.92647    44.6 59.05 65.4 5.160885   0
##        2 29.8 32.680    36.0  38.20 37.69057    40.0 41.10 41.2 2.827561   0
## P-value: 2.70065e-13
## Warning in mean.default(data[[var]], na.rm = TRUE): argument is not numeric or
## logical: returning NA
## 
## Variabel MrB_D_ tidak memiliki dua kelas setelah pembagian berdasarkan median.
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MrB_S_ 
## Method: minimize_metric 
## Predictor: MrB_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    50    54
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              31.6                      0   1           1           1 50  0  0
##  tn
##  54
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 24.9 26.915    29.2  31.10 31.41442  33.100 36.200 41.5 3.171977   0
##        1 31.6 31.900    32.4  33.10 33.95600  34.825 39.585 41.5 2.384830   0
##        2 24.9 26.095    28.2  29.35 29.06111  30.200 31.000 31.2 1.599577   0
## P-value: 2.432362e-20
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MBH_D_ 
## Method: minimize_metric 
## Predictor: MBH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    62    42
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              27.7                      0   1           1           1 62  0  0
##  tn
##  42
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 18.8 20.70  25.800  28.20 27.64038  30.225 32.285 34.8 3.589805   0
##        1 27.7 27.81  28.800  29.85 30.01613  31.100 32.600 34.8 1.593758   0
##        2 18.8 20.00  21.475  25.10 24.13333  26.650 27.390 27.5 2.731360   0
## P-value: 1.804622e-18
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MBH_S_ 
## Method: minimize_metric 
## Predictor: MBH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 104    61    43
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              27.8                      0   1           1           1 61  0  0
##  tn
##  43
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 19.1 21.415  25.775  28.35 27.71058   30.10 31.985 35.1 3.452196   0
##        1 27.8 28.000  28.900  29.90 30.07377   31.10 32.400 35.1 1.577752   0
##        2 19.1 20.440  22.000  25.00 24.35814   26.65 27.190 27.7 2.460278   0

## P-value: 1.603835e-20
library(tidyverse)
library(cutpointr)

# Daftar variabel yang akan dianalisis
variables <- colnames(data)[3:ncol(data)]

# Fungsi untuk melakukan cut-point analysis pada setiap variabel
cutpoint_analysis <- function(var, data) {
  data <- data %>% drop_na(var)  # Menghapus baris yang mengandung NA pada variabel yang dianalisis
  median_data <- mean(data[[var]], na.rm = TRUE)
  data$class <- ifelse(data[[var]] > median_data, 1, 2)
  if (length(unique(data$class)) == 2) {  # Pastikan ada dua kelas
    opt_cut <- cutpointr(data, !!sym(var), class, method = minimize_metric, metric = misclassification_cost, na.rm = TRUE)
    cat("\nAnalisis untuk variabel:", var, "\n")
    print(summary(opt_cut))
    plot_obj = plot_metric(opt_cut)
    print(plot_obj)
    
    # Hitung p-value menggunakan uji statistik yang sesuai
    # Contoh: uji t-test untuk membandingkan rata-rata kedua kelompok
    p_value <- t.test(data[[var]] ~ data$class)$p.value
    cat("P-value:", p_value, "\n") 
  } else {
    cat("\nVariabel", var, "tidak memiliki dua kelas setelah pembagian berdasarkan median.\n")
  }
}

# Melakukan analisis untuk setiap variabel
for (var in variables) {
  cutpoint_analysis(var, data_laki_laki)
}
## Assuming the positive class is 1
## Assuming the positive class has higher x values
## 
## Analisis untuk variabel: BB1 
## Method: minimize_metric 
## Predictor: BB1 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    71    69
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             122.3                      0   1           1           1 71  0  0
##  tn
##  69
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median     Mean 3rd Qu.    95%  Max.       SD
##  Overall 100.9 108.295 117.075  122.4 122.2147 127.325 133.45 138.6 7.718174
##        1 122.3 123.950 125.800  127.3 128.3980 130.000 135.25 138.6 3.597827
##        2 100.9 105.540 113.700  117.0 115.8522 119.900 121.60 122.1 5.238899
##  NAs
##    0
##    0
##    0
## P-value: 1.335152e-32
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: BB2 
## Method: minimize_metric 
## Predictor: BB2 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    73    67
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              97.9                      0   1           1           1 73  0  0
##  tn
##  67
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median      Mean 3rd Qu.     95%  Max.       SD
##  Overall 84.0 87.475  92.275   98.5  97.71643 101.925 106.505 124.8 6.341284
##        1 97.9 98.660 100.200  101.9 102.59589 104.000 107.480 124.8 3.880952
##        2 84.0 85.610  90.650   92.1  92.40000  96.250  97.100  97.7 3.637390
##  NAs
##    0
##    0
##    0
## P-value: 2.404664e-33
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: BB3 
## Method: minimize_metric 
## Predictor: BB3 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    34   106
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##               105                      0   1           1           1 34  0  0
##   tn
##  106
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median      Mean 3rd Qu.     95%  Max.        SD
##  Overall  76.6  81.065  95.450  98.80 104.97643 104.800 113.225 971.0  74.18903
##        1 105.0 105.100 105.425 107.45 134.09706 112.250 117.090 971.0 147.92529
##        2  76.6  80.250  92.800  97.05  95.63585 100.775 103.550 104.8   6.81370
##  NAs
##    0
##    0
##    0
## P-value: 0.1391397
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MA_D_ 
## Method: minimize_metric 
## Predictor: MA_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    80    60
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             125.2                      0   1           1           1 80  0  0
##  tn
##  60
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median     Mean 3rd Qu.    95%  Max.        SD
##  Overall  29.7 111.970 120.275 126.65 124.6321 130.600 135.61 141.6 11.213560
##        1 125.2 125.700 127.775 129.95 130.7512 133.875 136.11 141.6  3.582482
##        2  29.7 100.395 115.475 119.60 116.4733 121.550 123.91 124.5 12.672576
##  NAs
##    0
##    0
##    0
## P-value: 3.671945e-12
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MA_S_ 
## Method: minimize_metric 
## Predictor: MA_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    79    61
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##             125.8                      0   1           1           1 79  0  0
##  tn
##  61
## 
## Predictor summary: 
##     Data  Min.      5% 1st Qu. Median     Mean 3rd Qu.     95%  Max.       SD
##  Overall 100.3 114.065  120.90  126.2 125.7293  130.80 135.725 145.3 7.625700
##        1 125.8 126.090  127.85  130.0 130.9848  133.95 137.370 145.3 3.985296
##        2 100.3 108.100  116.40  120.5 118.9230  122.80 125.200 125.6 5.532130
##  NAs
##    0
##    0
##    0
## P-value: 1.464366e-26
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RL_D_ 
## Method: minimize_metric 
## Predictor: RL_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    74    66
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              58.2                      0   1           1           1 74  0  0
##  tn
##  66
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95%  Max.       SD NAs
##  Overall 39.9 44.090  53.975  58.35 58.18857  62.275 68.030 128.1 9.116260   0
##        1 58.2 58.365  59.825  61.95 63.60811  64.875 70.040 128.1 8.468795   0
##        2 39.9 41.475  48.525  53.55 52.11212  55.875 57.775  58.1 5.122725   0
## P-value: 3.679777e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RL_S_ 
## Method: minimize_metric 
## Predictor: RL_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    77    63
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              57.8                      0   1           1           1 77  0  0
##  tn
##  63
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 39.9 42.995   54.05   59.1 57.75500    61.9 67.12 88.6 7.317647   0
##        1 57.8 58.600   59.90   61.3 62.71688    64.4 68.96 88.6 4.507024   0
##        2 39.9 42.030   47.70   53.2 51.69048    56.1 57.29 57.7 5.215600   0
## P-value: 2.141742e-25
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CRH_D_ 
## Method: minimize_metric 
## Predictor: CRH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    76    64
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              62.3                      0   1           1           1 76  0  0
##  tn
##  64
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 43.5 48.995  57.925  63.40 62.21929  66.600 73.125 84.8 7.185176   0
##        1 62.3 63.125  64.550  66.40 67.35132  68.775 75.550 84.8 4.067153   0
##        2 43.5 48.075  52.275  57.15 56.12500  60.525 61.900 62.0 4.971091   0
## P-value: 3.701794e-28
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CRH_S_ 
## Method: minimize_metric 
## Predictor: CRH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    77    63
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              61.7                      0   1           1           1 77  0  0
##  tn
##  63
## 
## Predictor summary: 
##     Data Min.    5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 43.9 49.59   57.90   62.8 61.63286  66.175 70.515 78.8 6.638675   0
##        1 61.7 62.52   64.10   66.0 66.43377  68.300 71.120 78.8 3.107614   0
##        2 43.9 47.49   51.95   57.6 55.76508  60.150 60.990 61.5 4.832957   0
## P-value: 8.365934e-28
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CL_D_ 
## Method: minimize_metric 
## Predictor: CL_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    79    61
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              60.7                      0   1           1           1 79  0  0
##  tn
##  61
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 42.1 48.145   56.75  61.55 60.62714   64.95 70.145 80.6 6.850141   0
##        1 60.7 61.190   62.95  64.50 65.37342   66.75 71.310 80.6 3.641981   0
##        2 42.1 46.700   50.70  55.30 54.48033   58.40 60.400 60.6 4.836453   0
## P-value: 1.792543e-27
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: CL_S_ 
## Method: minimize_metric 
## Predictor: CL_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    78    62
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              60.5                      0   1           1           1 78  0  0
##  tn
##  62
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 42.9 47.975  56.150  61.50 60.18929  64.900 68.61 84.3 6.830167   0
##        1 60.5 61.085  63.000  64.40 65.07436  66.450 69.42 84.3 3.521564   0
##        2 42.9 45.385  50.425  54.85 54.04355  57.975 59.69 60.0 4.663202   0
## P-value: 2.084898e-29
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RH_D_ 
## Method: minimize_metric 
## Predictor: RH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    61    79
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              52.4                      0   1           1           1 61  0  0
##  tn
##  79
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 29.3 36.865    45.5  51.45 52.30214   59.45 66.115 72.4 9.261877   0
##        1 52.4 54.400    56.7  61.00 60.98525   65.40 67.600 72.4 4.944486   0
##        2 29.3 35.440    43.1  47.10 45.59747   50.25 51.730 52.2 5.437806   0
## P-value: 2.094998e-36
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: RH_S_ 
## Method: minimize_metric 
## Predictor: RH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    65    75
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              52.7                      0   1           1           1 65  0  0
##  tn
##  75
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 33.3 36.895  46.375  51.65 52.52786  60.125 66.205 83.1 9.163378   0
##        1 52.7 53.020  56.200  60.30 60.49538  64.000 67.880 83.1 5.498052   0
##        2 33.3 35.680  42.200  46.90 45.62267  50.050 51.830 52.5 5.243312   0
## P-value: 1.536512e-33
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: ML_D_ 
## Method: minimize_metric 
## Predictor: ML_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    75    65
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              74.4                      0   1           1           1 75  0  0
##  tn
##  65
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 44.7 59.785   71.45   75.1 74.39571   79.05 86.535 95.0 7.964118   0
##        1 74.4 75.010   76.00   78.5 79.82267   81.75 89.000 95.0 4.749782   0
##        2 44.7 57.440   64.20   71.1 68.13385   72.20 73.800 74.3 6.110065   0
## P-value: 1.826903e-23
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: ML_S_ 
## Method: minimize_metric 
## Predictor: ML_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    76    64
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              74.6                      0   1           1           1 76  0  0
##  tn
##  64
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 41.2 60.975  71.375  75.00 74.53000  78.550 85.905 96.0 7.564492   0
##        1 74.6 74.975  76.375  78.25 79.62237  82.325 86.350 96.0 4.174002   0
##        2 41.2 58.890  65.275  71.00 68.48281  72.825 73.785 74.4 6.089141   0
## P-value: 1.704684e-22
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRH_D_ 
## Method: minimize_metric 
## Predictor: MRH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    74    66
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##                62                      0   1           1           1 74  0  0
##  tn
##  66
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall   43 48.730   57.95  62.55 61.90143  66.950 72.265 82.8 7.396003   0
##        1   62 62.595   64.50  66.90 67.42703  69.200 74.290 82.8 3.976241   0
##        2   43 46.125   52.10  56.75 55.70606  59.975 61.500 61.7 5.048408   0
## P-value: 6.929898e-30
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRH_S_ 
## Method: minimize_metric 
## Predictor: MRH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    74    66
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              61.9                      0   1           1           1 74  0  0
##  tn
##  66
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 43.2 47.570  57.875  62.65 61.57571  66.700 71.620 80.4 7.248953   0
##        1 61.9 62.665  64.625  66.55 67.04054  69.075 73.065 80.4 3.627668   0
##        2 43.2 46.200  52.100  57.05 55.44848  59.675 61.200 61.5 5.046865   0
## P-value: 5.762605e-30
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRB_D_ 
## Method: minimize_metric 
## Predictor: MRB_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    66    74
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              44.1                      0   1           1           1 66  0  0
##  tn
##  74
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 33.3 34.990  40.000  43.65 44.00714  46.425 53.065 73.8 6.733826   0
##        1 44.1 44.325  45.225  46.70 48.90758  49.950 66.075 73.8 6.365975   0
##        2 33.3 33.865  37.225  40.05 39.63649  41.900 43.605 43.9 3.029923   0
## P-value: 6.044914e-18
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MRB_S_ 
## Method: minimize_metric 
## Predictor: MRB_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    56    84
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              44.8                      0   1           1           1 56  0  0
##  tn
##  84
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.   95% Max.       SD NAs
##  Overall 33.7 35.295  40.775  43.80 44.71786  46.625 58.75 94.1 8.350137   0
##        1 44.8 44.875  45.875  47.50 51.11607  50.450 70.30 94.1 9.575799   0
##        2 33.7 34.660  37.800  41.15 40.45238  42.900 44.30 44.7 3.157835   0
## P-value: 2.940462e-11
## Warning in mean.default(data[[var]], na.rm = TRUE): argument is not numeric or
## logical: returning NA
## 
## Variabel MrB_D_ tidak memiliki dua kelas setelah pembagian berdasarkan median.
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MrB_S_ 
## Method: minimize_metric 
## Predictor: MrB_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    64    76
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              33.4                      0   1           1           1 64  0  0
##  tn
##  76
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 22.6 28.000    30.6  32.90 33.33000  35.525 40.500 45.4 4.078893   0
##        1 33.4 33.415    34.1  35.85 36.78906  38.900 41.665 45.4 3.015732   0
##        2 22.6 27.000    29.2  30.80 30.41711  31.900 33.025 33.3 2.094462   0
## P-value: 1.074643e-26
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MBH_D_ 
## Method: minimize_metric 
## Predictor: MBH_D_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    80    60
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              29.8                      0   1           1           1 80  0  0
##  tn
##  60
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 20.4 23.600  27.700  30.20 29.74357  32.325 35.105 39.4 3.642203   0
##        1 29.8 29.895  30.750  31.70 32.28875  33.225 36.305 39.4 1.949034   0
##        2 20.4 21.900  24.475  26.85 26.35000  28.300 29.215 29.6 2.381461   0
## P-value: 3.382794e-30
## Assuming the positive class is 1
## Assuming the positive class has higher x values

## 
## Analisis untuk variabel: MBH_S_ 
## Method: minimize_metric 
## Predictor: MBH_S_ 
## Outcome: class 
## Direction: >= 
## 
##  AUC   n n_pos n_neg
##    1 140    74    66
## 
##  optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
##              29.8                      0   1           1           1 74  0  0
##  tn
##  66
## 
## Predictor summary: 
##     Data Min.     5% 1st Qu. Median     Mean 3rd Qu.    95% Max.       SD NAs
##  Overall 20.2 23.475  27.850  29.90 29.70143  32.825 34.605 36.4 3.568628   0
##        1 29.8 29.900  31.050  32.50 32.44865  33.750 35.335 36.4 1.675467   0
##        2 20.2 22.075  24.825  27.45 26.62121  28.775 29.200 29.4 2.419696   0

## P-value: 1.02997e-31
# Load necessary libraries
library(ggplot2)
library(readxl)
library(dplyr)
library(rlang)
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
# Load the dataset
file_path <- "/home/taufiq/Documents/freelancer/kak qardawi/project baru/data/data.xlsx" # Adjust the path as needed
data <- read_excel(file_path)

# Select relevant columns
selected_columns <- c("Usia", "MRB(D)", "MRB(S)", "MRH(D)", "MRH(S)", "MrB(D)", "MBH(S)", "MrB(S)")

# Convert all selected columns to numeric, handling non-numeric values as NA
data[selected_columns[-1]] <- lapply(data[selected_columns[-1]], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
# Create individual line plots for each variable
for (variable in selected_columns[-1]) {
  p <- ggplot(data, aes(x = Usia, y = !!sym(variable))) +
    geom_line(color = "blue") +
    labs(title = paste("Line Plot of", variable, "by Age"),
         x = "Age",
         y = variable) +
    theme_minimal()
  
  # Save the plot
  ggsave(filename = paste0("plot_", gsub("[()]", "", variable), ".png"), plot = p, width = 8, height = 6)
}