# Memuat library yang dibutuhkan
library(readxl)
library(car)
## Loading required package: carData
library(stats)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rlang)
# Memuat data dari file excel
file_path <- "/home/taufiq/Documents/freelancer/kak qardawi/project baru/baru/data/data_fix 2.xlsx"
data <- read_excel(file_path, sheet = "Sheet1")
# Mengganti tanda kurung dalam nama kolom
colnames(data) <- gsub("\\(|\\)", "_", colnames(data))
# Memisahkan data berdasarkan jenis kelamin
data_perempuan <- data %>% filter(Jenis_Kelamin == "Perempuan")
data_laki_laki <- data %>% filter(Jenis_Kelamin == "Laki-laki")
# Tentukan variabel dependen dan independen
dependent_var <- "Usia" # Ganti dengan variabel dependen yang sesuai
# Fungsi untuk menghapus outlier
remove_outliers <- function(df, var) {
df <- df %>% mutate_at(vars(var), as.numeric)
Q1 <- quantile(df[[var]], 0.25, na.rm = TRUE)
Q3 <- quantile(df[[var]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
df <- df %>% filter(df[[var]] >= lower_bound & df[[var]] <= upper_bound)
return(df)
}
# Menghapus outlier dari data
clean_data_perempuan <- data_perempuan
clean_data_laki_laki <- data_laki_laki
desc_vars_perempuan <- colnames(data_perempuan)[3:ncol(data_perempuan)]
desc_vars_laki_laki <- colnames(data_laki_laki)[3:ncol(data_laki_laki)]
for (var in desc_vars_perempuan) {
clean_data_perempuan <- remove_outliers(clean_data_perempuan, var)
}
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(var)
##
## # Now:
## data %>% select(all_of(var))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
for (var in desc_vars_laki_laki) {
clean_data_laki_laki <- remove_outliers(clean_data_laki_laki, var)
}
# Memastikan semua variabel independen adalah numerik
numeric_data_perempuan <- clean_data_perempuan %>% mutate_at(vars(desc_vars_perempuan), as.numeric)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(desc_vars_perempuan)
##
## # Now:
## data %>% select(all_of(desc_vars_perempuan))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
numeric_data_laki_laki <- clean_data_laki_laki %>% mutate_at(vars(desc_vars_laki_laki), as.numeric)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(desc_vars_laki_laki)
##
## # Now:
## data %>% select(all_of(desc_vars_laki_laki))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Melakukan uji ANOVA dan Kruskal-Wallis
perform_tests <- function(dependent_var, independent_var, data) {
shapiro_test <- shapiro.test(data[[dependent_var]])
cat("\nShapiro-Wilk normality test for", dependent_var, "\n")
print(shapiro_test)
if (shapiro_test$p.value > 0.001) {
homogeneity_test <- bartlett.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
cat("\nBartlett test for homogeneity of variances for", dependent_var, "and", independent_var, "\n")
} else {
# Menambahkan pengecekan untuk menghindari error
if (length(unique(data[[independent_var]])) > 1) {
homogeneity_test <- fligner.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
cat("\nFligner-Killeen test for homogeneity of variances for", dependent_var, "and", independent_var, "\n")
} else {
cat("\nFligner-Killeen test cannot be performed because all observations are in the same group for", independent_var, "\n")
homogeneity_test <- list(p.value = NA)
}
}
print(homogeneity_test)
result <- list()
if (shapiro_test$p.value > 0.001 & homogeneity_test$p.value > 0.001) {
anova_result <- aov(as.formula(paste(dependent_var, "~", independent_var)), data = data)
result$anova <- summary(anova_result)
cat("\nANOVA result for", dependent_var, "and", independent_var, "\n")
print(result$anova)
} else {
# Menambahkan pengecekan untuk menghindari error
if (length(unique(data[[independent_var]])) > 1) {
kruskal_test <- kruskal.test(as.formula(paste(dependent_var, "~", independent_var)), data = data)
result$kruskal <- kruskal_test
cat("\nKruskal-Wallis test result for", dependent_var, "and", independent_var, "\n")
print(result$kruskal)
} else {
cat("\nKruskal-Wallis test cannot be performed because all observations are in the same group for", independent_var, "\n")
result$kruskal <- list(p.value = NA)
}
}
return(result)
}
results_perempuan <- list()
for (independent_var in colnames(numeric_data_perempuan[, -1])) {
results_perempuan[[independent_var]] <- perform_tests(dependent_var, independent_var, numeric_data_perempuan)
}
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test cannot be performed because all observations are in the same group for Jenis_Kelamin
## $p.value
## [1] NA
##
##
## Kruskal-Wallis test cannot be performed because all observations are in the same group for Jenis_Kelamin
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB1
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB1
## Fligner-Killeen:med chi-squared = 43.762, df = 53, p-value = 0.8132
##
##
## Kruskal-Wallis test result for Usia and BB1
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB1
## Kruskal-Wallis chi-squared = 58.644, df = 53, p-value = 0.2762
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB2
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB2
## Fligner-Killeen:med chi-squared = 65, df = 54, p-value = 0.1452
##
##
## Kruskal-Wallis test result for Usia and BB2
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB2
## Kruskal-Wallis chi-squared = 57.929, df = 54, p-value = 0.3325
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB3
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB3
## Fligner-Killeen:med chi-squared = 58.176, df = 54, p-value = 0.3243
##
##
## Kruskal-Wallis test result for Usia and BB3
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB3
## Kruskal-Wallis chi-squared = 58.955, df = 54, p-value = 0.2992
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MA_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MA_D_
## Fligner-Killeen:med chi-squared = 48.142, df = 58, p-value = 0.8186
##
##
## Kruskal-Wallis test result for Usia and MA_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_D_
## Kruskal-Wallis chi-squared = 63.921, df = 58, p-value = 0.2764
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MA_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MA_S_
## Fligner-Killeen:med chi-squared = 65, df = 54, p-value = 0.1452
##
##
## Kruskal-Wallis test result for Usia and MA_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_S_
## Kruskal-Wallis chi-squared = 59.733, df = 54, p-value = 0.2753
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and RL_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by RL_D_
## Fligner-Killeen:med chi-squared = 55.216, df = 51, p-value = 0.3184
##
##
## Kruskal-Wallis test result for Usia and RL_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by RL_D_
## Kruskal-Wallis chi-squared = 56.058, df = 51, p-value = 0.2909
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MRB_D_
## Fligner-Killeen:med chi-squared = 42.959, df = 43, p-value = 0.4731
##
##
## Kruskal-Wallis test result for Usia and MRB_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_D_
## Kruskal-Wallis chi-squared = 46.601, df = 43, p-value = 0.3265
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MRB_S_
## Fligner-Killeen:med chi-squared = 50.984, df = 48, p-value = 0.3571
##
##
## Kruskal-Wallis test result for Usia and MRB_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_S_
## Kruskal-Wallis chi-squared = 51.713, df = 48, p-value = 0.3309
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MrB_D_
## Fligner-Killeen:med chi-squared = 32.186, df = 39, p-value = 0.7719
##
##
## Kruskal-Wallis test result for Usia and MrB_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_D_
## Kruskal-Wallis chi-squared = 47.323, df = 39, p-value = 0.1693
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90263, p-value = 7.916e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MrB_S_
## Fligner-Killeen:med chi-squared = 36.595, df = 43, p-value = 0.7439
##
##
## Kruskal-Wallis test result for Usia and MrB_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_S_
## Kruskal-Wallis chi-squared = 43.447, df = 43, p-value = 0.4523
for (independent_var in names(results_perempuan)) {
cat("\nHasil uji untuk variabel independen (Perempuan):", independent_var, "\n")
print(results_perempuan[[independent_var]])
}
##
## Hasil uji untuk variabel independen (Perempuan): Jenis_Kelamin
## $kruskal
## $kruskal$p.value
## [1] NA
##
##
##
## Hasil uji untuk variabel independen (Perempuan): BB1
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB1
## Kruskal-Wallis chi-squared = 58.644, df = 53, p-value = 0.2762
##
##
##
## Hasil uji untuk variabel independen (Perempuan): BB2
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB2
## Kruskal-Wallis chi-squared = 57.929, df = 54, p-value = 0.3325
##
##
##
## Hasil uji untuk variabel independen (Perempuan): BB3
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB3
## Kruskal-Wallis chi-squared = 58.955, df = 54, p-value = 0.2992
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MA_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_D_
## Kruskal-Wallis chi-squared = 63.921, df = 58, p-value = 0.2764
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MA_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_S_
## Kruskal-Wallis chi-squared = 59.733, df = 54, p-value = 0.2753
##
##
##
## Hasil uji untuk variabel independen (Perempuan): RL_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by RL_D_
## Kruskal-Wallis chi-squared = 56.058, df = 51, p-value = 0.2909
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MRB_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_D_
## Kruskal-Wallis chi-squared = 46.601, df = 43, p-value = 0.3265
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MRB_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_S_
## Kruskal-Wallis chi-squared = 51.713, df = 48, p-value = 0.3309
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MrB_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_D_
## Kruskal-Wallis chi-squared = 47.323, df = 39, p-value = 0.1693
##
##
##
## Hasil uji untuk variabel independen (Perempuan): MrB_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_S_
## Kruskal-Wallis chi-squared = 43.447, df = 43, p-value = 0.4523
results_laki_laki <- list()
for (independent_var in colnames(numeric_data_laki_laki[, -1])) {
results_laki_laki[[independent_var]] <- perform_tests(dependent_var, independent_var, numeric_data_laki_laki)
}
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test cannot be performed because all observations are in the same group for Jenis_Kelamin
## $p.value
## [1] NA
##
##
## Kruskal-Wallis test cannot be performed because all observations are in the same group for Jenis_Kelamin
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB1
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB1
## Fligner-Killeen:med chi-squared = 61.07, df = 55, p-value = 0.267
##
##
## Kruskal-Wallis test result for Usia and BB1
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB1
## Kruskal-Wallis chi-squared = 55.688, df = 55, p-value = 0.4487
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB2
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB2
## Fligner-Killeen:med chi-squared = 50.02, df = 53, p-value = 0.5909
##
##
## Kruskal-Wallis test result for Usia and BB2
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB2
## Kruskal-Wallis chi-squared = 59.291, df = 53, p-value = 0.257
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and BB3
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by BB3
## Fligner-Killeen:med chi-squared = 49.249, df = 53, p-value = 0.621
##
##
## Kruskal-Wallis test result for Usia and BB3
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB3
## Kruskal-Wallis chi-squared = 54.92, df = 53, p-value = 0.4017
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MA_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MA_D_
## Fligner-Killeen:med chi-squared = 65, df = 56, p-value = 0.1919
##
##
## Kruskal-Wallis test result for Usia and MA_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_D_
## Kruskal-Wallis chi-squared = 61.813, df = 56, p-value = 0.2763
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MA_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MA_S_
## Fligner-Killeen:med chi-squared = 48.287, df = 55, p-value = 0.727
##
##
## Kruskal-Wallis test result for Usia and MA_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_S_
## Kruskal-Wallis chi-squared = 55.319, df = 55, p-value = 0.4626
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and RL_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by RL_D_
## Fligner-Killeen:med chi-squared = 65, df = 55, p-value = 0.1676
##
##
## Kruskal-Wallis test result for Usia and RL_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by RL_D_
## Kruskal-Wallis chi-squared = 58.488, df = 55, p-value = 0.3486
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MRB_D_
## Fligner-Killeen:med chi-squared = 55.664, df = 48, p-value = 0.2086
##
##
## Kruskal-Wallis test result for Usia and MRB_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_D_
## Kruskal-Wallis chi-squared = 54.526, df = 48, p-value = 0.2403
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MRB_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MRB_S_
## Fligner-Killeen:med chi-squared = 47.11, df = 50, p-value = 0.5901
##
##
## Kruskal-Wallis test result for Usia and MRB_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_S_
## Kruskal-Wallis chi-squared = 56.306, df = 50, p-value = 0.2508
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_D_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MrB_D_
## Fligner-Killeen:med chi-squared = 54.334, df = 49, p-value = 0.2785
##
##
## Kruskal-Wallis test result for Usia and MrB_D_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_D_
## Kruskal-Wallis chi-squared = 46.543, df = 49, p-value = 0.5733
##
##
## Shapiro-Wilk normality test for Usia
##
## Shapiro-Wilk normality test
##
## data: data[[dependent_var]]
## W = 0.90313, p-value = 8.274e-05
##
##
## Fligner-Killeen test for homogeneity of variances for Usia and MrB_S_
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Usia by MrB_S_
## Fligner-Killeen:med chi-squared = 60.831, df = 43, p-value = 0.03779
##
##
## Kruskal-Wallis test result for Usia and MrB_S_
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_S_
## Kruskal-Wallis chi-squared = 54.123, df = 43, p-value = 0.119
for (independent_var in names(results_laki_laki)) {
cat("\nHasil uji untuk variabel independen (Laki-laki):", independent_var, "\n")
print(results_laki_laki[[independent_var]])
}
##
## Hasil uji untuk variabel independen (Laki-laki): Jenis_Kelamin
## $kruskal
## $kruskal$p.value
## [1] NA
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): BB1
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB1
## Kruskal-Wallis chi-squared = 55.688, df = 55, p-value = 0.4487
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): BB2
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB2
## Kruskal-Wallis chi-squared = 59.291, df = 53, p-value = 0.257
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): BB3
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by BB3
## Kruskal-Wallis chi-squared = 54.92, df = 53, p-value = 0.4017
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MA_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_D_
## Kruskal-Wallis chi-squared = 61.813, df = 56, p-value = 0.2763
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MA_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MA_S_
## Kruskal-Wallis chi-squared = 55.319, df = 55, p-value = 0.4626
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): RL_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by RL_D_
## Kruskal-Wallis chi-squared = 58.488, df = 55, p-value = 0.3486
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MRB_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_D_
## Kruskal-Wallis chi-squared = 54.526, df = 48, p-value = 0.2403
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MRB_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MRB_S_
## Kruskal-Wallis chi-squared = 56.306, df = 50, p-value = 0.2508
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MrB_D_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_D_
## Kruskal-Wallis chi-squared = 46.543, df = 49, p-value = 0.5733
##
##
##
## Hasil uji untuk variabel independen (Laki-laki): MrB_S_
## $kruskal
##
## Kruskal-Wallis rank sum test
##
## data: Usia by MrB_S_
## Kruskal-Wallis chi-squared = 54.123, df = 43, p-value = 0.119
# Uji multikolinearitas sebelum DFA
multicollinearity_test <- function(data, dependent_var, independent_vars) {
formula <- as.formula(paste(dependent_var, "~", paste(independent_vars, collapse = "+")))
model <- lm(formula, data = data)
vif_values <- vif(model)
return(vif_values)
}
vif_results_perempuan_before <- multicollinearity_test(clean_data_perempuan, dependent_var, desc_vars_perempuan)
cat("\nUji Multikolinearitas Sebelum DFA (Perempuan):\n")
##
## Uji Multikolinearitas Sebelum DFA (Perempuan):
print(vif_results_perempuan_before)
## BB1 BB2 BB3 MA_D_ MA_S_ RL_D_ MRB_D_ MRB_S_
## 1.768976 1.328725 1.536765 3.696118 3.900903 1.535011 7.294057 6.772663
## MrB_D_ MrB_S_
## 3.979985 2.853235
vif_results_laki_laki_before <- multicollinearity_test(clean_data_laki_laki, dependent_var, desc_vars_laki_laki)
cat("\nUji Multikolinearitas Sebelum DFA (Laki-laki):\n")
##
## Uji Multikolinearitas Sebelum DFA (Laki-laki):
print(vif_results_laki_laki_before)
## BB1 BB2 BB3 MA_D_ MA_S_ RL_D_ MRB_D_ MRB_S_
## 1.899131 1.535127 1.542474 8.270257 7.871134 1.680170 9.862466 11.007594
## MrB_D_ MrB_S_
## 15.243395 16.162875
# Melakukan DFA
d_data_perempuan <- clean_data_perempuan[, c(dependent_var, desc_vars_perempuan)]
d_data_perempuan[[dependent_var]] <- as.factor(d_data_perempuan[[dependent_var]])
d_data_laki_laki <- clean_data_laki_laki[, c(dependent_var, desc_vars_laki_laki)]
d_data_laki_laki[[dependent_var]] <- as.factor(d_data_laki_laki[[dependent_var]])
# DFA untuk Perempuan
discriminant_result_perempuan <- lda(as.formula(paste(dependent_var, "~", paste(desc_vars_perempuan, collapse = "+"))), data = d_data_perempuan)
print(discriminant_result_perempuan)
## Call:
## lda(as.formula(paste(dependent_var, "~", paste(desc_vars_perempuan,
## collapse = "+"))), data = d_data_perempuan)
##
## Prior probabilities of groups:
## 1 2 3 4 5 6 7
## 0.15151515 0.07575758 0.18181818 0.07575758 0.16666667 0.18181818 0.16666667
##
## Group means:
## BB1 BB2 BB3 MA_D_ MA_S_ RL_D_ MRB_D_ MRB_S_
## 1 113.8400 93.52000 92.94000 129.8900 130.6900 51.74000 39.27000 39.36000
## 2 116.8200 97.46000 92.40000 124.0600 125.4800 53.92000 40.10000 40.48000
## 3 119.9333 96.30833 93.03333 125.6750 124.5417 55.20833 39.85000 40.48333
## 4 118.1000 98.62000 98.22000 125.6200 123.2600 55.48000 43.06000 42.58000
## 5 119.1009 93.58182 94.21818 125.6455 125.3273 52.06364 40.91818 40.67273
## 6 120.6958 89.60833 96.24167 126.9250 126.6333 55.14167 43.45833 43.74583
## 7 123.6909 98.39091 95.64545 126.6182 125.9727 56.65455 42.55455 43.41818
## MrB_D_ MrB_S_
## 1 29.71000 29.69000
## 2 30.16000 31.46000
## 3 31.05000 32.11667
## 4 31.54000 31.22000
## 5 30.39091 30.03636
## 6 32.13333 32.22500
## 7 32.49091 32.05455
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4 LD5 LD6
## BB1 -0.05426451 -0.11727839 0.06678379 0.025659862 0.17785353 0.03649324
## BB2 0.31130728 -0.09494906 -0.09337126 -0.009332782 -0.01204049 0.05388925
## BB3 -0.03681862 0.11031676 -0.11435929 0.137715486 -0.05835721 -0.06406271
## MA_D_ 0.01265317 -0.09002850 0.03127370 0.008884326 0.05463177 -0.22170623
## MA_S_ 0.01248826 0.07900216 -0.07420888 -0.189772633 -0.06499632 0.20659761
## RL_D_ -0.01513692 -0.05579317 0.02166180 -0.105578356 -0.12759055 -0.08394280
## MRB_D_ 0.25636314 0.19726185 -0.15327327 0.261933134 -0.05269248 0.20341553
## MRB_S_ -0.34885955 -0.35843151 -0.09039918 -0.177626577 -0.03519967 0.10288189
## MrB_D_ -0.15798115 -0.13491129 -0.33864133 -0.541943422 0.10429766 -0.38757592
## MrB_S_ 0.09115522 -0.06682421 0.54623458 0.291698707 -0.30117483 0.16419867
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5 LD6
## 0.3984 0.3147 0.1339 0.0841 0.0477 0.0212
d_scores_perempuan <- predict(discriminant_result_perempuan)$x
d_scores_perempuan <- as.data.frame(d_scores_perempuan)
colnames(d_scores_perempuan) <- paste0("LD", 1:ncol(d_scores_perempuan))
contributions_perempuan <- discriminant_result_perempuan$scaling
top_contributors_perempuan <- apply(contributions_perempuan, 2, function(x) desc_vars_perempuan[which.max(abs(x))])
contributions_df_perempuan <- data.frame(Discriminant = colnames(contributions_perempuan), Top_Contributor = top_contributors_perempuan)
print(contributions_df_perempuan)
## Discriminant Top_Contributor
## LD1 LD1 MRB_S_
## LD2 LD2 MRB_S_
## LD3 LD3 MrB_S_
## LD4 LD4 MrB_D_
## LD5 LD5 MrB_S_
## LD6 LD6 MrB_D_
# DFA untuk Laki-laki
discriminant_result_laki_laki <- lda(as.formula(paste(dependent_var, "~", paste(desc_vars_laki_laki, collapse = "+"))), data = d_data_laki_laki)
print(discriminant_result_laki_laki)
## Call:
## lda(as.formula(paste(dependent_var, "~", paste(desc_vars_laki_laki,
## collapse = "+"))), data = d_data_laki_laki)
##
## Prior probabilities of groups:
## 1 2 3 4 5 6 7
## 0.16666667 0.16666667 0.21212121 0.04545455 0.15151515 0.18181818 0.07575758
##
## Group means:
## BB1 BB2 BB3 MA_D_ MA_S_ RL_D_ MRB_D_ MRB_S_
## 1 121.9000 96.80909 97.21818 127.0909 126.4909 58.42727 42.61818 42.31818
## 2 121.7364 98.21818 99.49091 127.3636 127.8273 57.36364 42.12727 42.19091
## 3 126.7571 97.89286 100.32857 125.3571 125.1714 60.82143 43.41429 43.60714
## 4 128.0000 99.46667 103.16667 124.5000 128.3000 59.13333 46.36667 46.13333
## 5 126.5030 100.25000 102.99000 123.3200 123.3000 60.98000 47.49000 48.29000
## 6 125.1775 100.93333 105.94167 127.8667 128.1583 62.55833 44.71667 44.95000
## 7 124.0000 100.06000 102.72000 126.5000 127.7800 60.12000 46.70000 46.68000
## MrB_D_ MrB_S_
## 1 32.72727 33.13636
## 2 31.50000 31.49091
## 3 33.03571 33.50714
## 4 36.53333 36.13333
## 5 37.68000 38.09000
## 6 32.34167 32.79167
## 7 34.04000 34.00000
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4 LD5
## BB1 0.11974695 0.08732943 0.11688490 0.155901992 -0.08725077
## BB2 -0.15327393 -0.09207786 -0.08955682 -0.056845852 -0.02171269
## BB3 -0.06616421 -0.06856035 0.01777973 0.008832351 0.08992307
## MA_D_ 0.05426782 0.06064241 0.23056016 -0.228706904 -0.04430043
## MA_S_ -0.05884201 -0.03095953 -0.21468117 0.217429794 0.14692397
## RL_D_ -0.04590780 -0.01169595 0.16192294 -0.020228706 0.08823403
## MRB_D_ 0.02694144 -0.20698367 -0.19771751 0.137555553 0.42764234
## MRB_S_ -0.44282691 0.12939325 0.15648010 -0.163114111 -0.71076957
## MrB_D_ -0.11842621 0.13918023 -0.20588805 0.093518903 -0.30184625
## MrB_S_ 0.28633351 0.24676761 0.13470032 -0.164216231 0.66732866
## LD6
## BB1 -0.030793235
## BB2 0.071429806
## BB3 0.073643985
## MA_D_ -0.009500776
## MA_S_ 0.014390908
## RL_D_ 0.011985416
## MRB_D_ -0.567215378
## MRB_S_ 0.220788776
## MrB_D_ 0.406952001
## MrB_S_ -0.178248595
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5 LD6
## 0.4702 0.2798 0.1385 0.0656 0.0299 0.0160
d_scores_laki_laki <- predict(discriminant_result_laki_laki)$x
d_scores_laki_laki <- as.data.frame(d_scores_laki_laki)
colnames(d_scores_laki_laki) <- paste0("LD", 1:ncol(d_scores_laki_laki))
contributions_laki_laki <- discriminant_result_laki_laki$scaling
top_contributors_laki_laki <- apply(contributions_laki_laki, 2, function(x) desc_vars_laki_laki[which.max(abs(x))])
contributions_df_laki_laki <- data.frame(Discriminant = colnames(contributions_laki_laki), Top_Contributor = top_contributors_laki_laki)
print(contributions_df_laki_laki)
## Discriminant Top_Contributor
## LD1 LD1 MRB_S_
## LD2 LD2 MrB_S_
## LD3 LD3 MA_D_
## LD4 LD4 MA_D_
## LD5 LD5 MRB_S_
## LD6 LD6 MRB_D_
# Uji multikolinearitas setelah DFA
vif_results_perempuan <- multicollinearity_test(cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), dependent_var, colnames(d_scores_perempuan))
cat("\nUji Multikolinearitas Setelah DFA (Perempuan):\n")
##
## Uji Multikolinearitas Setelah DFA (Perempuan):
print(vif_results_perempuan)
## LD1 LD2 LD3 LD4 LD5 LD6
## 1 1 1 1 1 1
vif_results_laki_laki <- multicollinearity_test(cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), dependent_var, colnames(d_scores_laki_laki))
cat("\nUji Multikolinearitas Setelah DFA (Laki-laki):\n")
##
## Uji Multikolinearitas Setelah DFA (Laki-laki):
print(vif_results_laki_laki)
## LD1 LD2 LD3 LD4 LD5 LD6
## 1 1 1 1 1 1
# Membuat boxplot data asli dan bersih untuk Perempuan
melted_data_perempuan_original <- melt(data_perempuan[, desc_vars_perempuan])
## No id variables; using all as measure variables
melted_data_perempuan_clean <- melt(clean_data_perempuan[, desc_vars_perempuan])
## No id variables; using all as measure variables
boxplot_plot_perempuan_original <- ggplot(melted_data_perempuan_original, aes(x = variable, y = value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Boxplot untuk Variabel Independen (Perempuan - Data Asli)", x = "Variabel", y = "Nilai")
boxplot_plot_perempuan_clean <- ggplot(melted_data_perempuan_clean, aes(x = variable, y = value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Boxplot untuk Variabel Independen (Perempuan - Data Bersih)", x = "Variabel", y = "Nilai")
print(boxplot_plot_perempuan_original)

print(boxplot_plot_perempuan_clean)

# Membuat boxplot data asli dan bersih untuk Laki-laki
melted_data_laki_laki_original <- melt(data_laki_laki[, desc_vars_laki_laki])
## No id variables; using all as measure variables
melted_data_laki_laki_clean <- melt(clean_data_laki_laki[, desc_vars_laki_laki])
## No id variables; using all as measure variables
boxplot_plot_laki_laki_original <- ggplot(melted_data_laki_laki_original, aes(x = variable, y = value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Boxplot untuk Variabel Independen (Laki-laki - Data Asli)", x = "Variabel", y = "Nilai")
boxplot_plot_laki_laki_clean <- ggplot(melted_data_laki_laki_clean, aes(x = variable, y = value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Boxplot untuk Variabel Independen (Laki-laki - Data Bersih)", x = "Variabel", y = "Nilai")
print(boxplot_plot_laki_laki_original)

print(boxplot_plot_laki_laki_clean)

# Menghitung statistik deskriptif untuk semua variabel DFA
desc_stats_perempuan <- describe(d_scores_perempuan)
cat("\nStatistika Deskriptif (Perempuan):\n")
##
## Statistika Deskriptif (Perempuan):
print(desc_stats_perempuan)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## LD1 1 66 0 1.41 0.01 0.05 1.56 -3.00 2.30 5.30 -0.32 -0.93 0.17
## LD2 2 66 0 1.33 -0.08 -0.04 1.06 -2.86 3.40 6.27 0.26 0.00 0.16
## LD3 3 66 0 1.13 0.00 0.02 0.86 -3.70 3.58 7.28 -0.13 1.66 0.14
## LD4 4 66 0 1.07 -0.08 0.01 1.19 -2.62 1.92 4.55 -0.07 -0.83 0.13
## LD5 5 66 0 1.02 -0.07 -0.07 0.80 -2.19 3.13 5.31 0.69 0.63 0.13
## LD6 6 66 0 0.98 -0.08 -0.02 0.71 -2.41 3.02 5.43 0.35 0.92 0.12
desc_stats_laki_laki <- describe(d_scores_laki_laki)
cat("\nStatistika Deskriptif (Laki-laki):\n")
##
## Statistika Deskriptif (Laki-laki):
print(desc_stats_laki_laki)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## LD1 1 66 0 1.35 0.06 0.00 1.36 -2.83 3.23 6.05 0.04 -0.56 0.17
## LD2 2 66 0 1.20 -0.07 -0.04 1.17 -2.48 2.59 5.07 0.26 -0.75 0.15
## LD3 3 66 0 1.08 0.12 0.08 1.19 -2.62 1.66 4.28 -0.49 -0.52 0.13
## LD4 4 66 0 1.02 -0.02 -0.04 1.21 -1.78 2.05 3.83 0.37 -0.80 0.13
## LD5 5 66 0 0.98 0.17 0.02 0.83 -2.51 2.25 4.76 -0.35 -0.21 0.12
## LD6 6 66 0 0.97 -0.07 -0.05 0.86 -2.07 3.42 5.49 0.70 1.11 0.12
# Mengubah variabel dependen menjadi faktor
clean_data_perempuan[[dependent_var]] <- as.factor(clean_data_perempuan[[dependent_var]])
clean_data_laki_laki[[dependent_var]] <- as.factor(clean_data_laki_laki[[dependent_var]])
# Analisis Regresi Logistik Ordinal untuk Perempuan
formula_str_perempuan <- paste(dependent_var, "~", paste(colnames(d_scores_perempuan), collapse = " + "))
data_model_perempuan <- cbind(clean_data_perempuan[dependent_var], d_scores_perempuan)
colnames(data_model_perempuan)[1] <- dependent_var # Pastikan nama kolom dependen sesuai
logistic_model_perempuan <- polr(as.formula(formula_str_perempuan), data = data_model_perempuan, Hess = TRUE)
summary(logistic_model_perempuan)
## Call:
## polr(formula = as.formula(formula_str_perempuan), data = data_model_perempuan,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## LD1 -0.57285 0.1680 -3.4108
## LD2 -1.03116 0.2190 -4.7076
## LD3 -0.52947 0.2313 -2.2888
## LD4 0.11661 0.2161 0.5395
## LD5 0.24926 0.2321 1.0739
## LD6 0.09965 0.2534 0.3932
##
## Intercepts:
## Value Std. Error t value
## 1|2 -2.5413 0.4486 -5.6647
## 2|3 -1.8164 0.3756 -4.8354
## 3|4 -0.5780 0.3049 -1.8956
## 4|5 -0.1435 0.2981 -0.4814
## 5|6 0.8989 0.3161 2.8441
## 6|7 2.3118 0.4055 5.7019
##
## Residual Deviance: 210.5472
## AIC: 234.5472
# Uji Likelihood Ratio untuk godness of fit (Perempuan)
null_model_perempuan <- polr(as.formula(paste(dependent_var, "~ 1")), data = cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), Hess = TRUE)
lr_test_perempuan <- anova(null_model_perempuan, logistic_model_perempuan)
cat("\nUji Likelihood Ratio (Perempuan):\n")
##
## Uji Likelihood Ratio (Perempuan):
print(lr_test_perempuan)
## Likelihood ratio tests of ordinal regression models
##
## Response: Usia
## Model Resid. df Resid. Dev Test Df LR stat.
## 1 1 60 250.0111
## 2 LD1 + LD2 + LD3 + LD4 + LD5 + LD6 54 210.5472 1 vs 2 6 39.46383
## Pr(Chi)
## 1
## 2 5.804913e-07
# Uji signifikan parsial (wald) untuk Perempuan
coeftest_result_perempuan <- coeftest(logistic_model_perempuan)
cat("\nUji Signifikansi Parsial (Wald) (Perempuan):\n")
##
## Uji Signifikansi Parsial (Wald) (Perempuan):
print(coeftest_result_perempuan)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## LD1 -0.572849 0.167953 -3.4108 0.001233 **
## LD2 -1.031159 0.219043 -4.7076 1.788e-05 ***
## LD3 -0.529470 0.231334 -2.2888 0.026031 *
## LD4 0.116606 0.216127 0.5395 0.591744
## LD5 0.249260 0.232106 1.0739 0.287641
## LD6 0.099645 0.253400 0.3932 0.695695
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Odds ratio untuk Perempuan
odds_ratios_perempuan <- exp(coef(logistic_model_perempuan))
cat("\nOdds Ratios (Perempuan):\n")
##
## Odds Ratios (Perempuan):
print(odds_ratios_perempuan)
## LD1 LD2 LD3 LD4 LD5 LD6
## 0.5639168 0.3565933 0.5889169 1.1236764 1.2830755 1.1047789
# Analisis Regresi Logistik Ordinal untuk Laki-laki
formula_str_laki_laki <- paste(dependent_var, "~", paste(colnames(d_scores_laki_laki), collapse = " + "))
data_model_laki_laki <- cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki)
colnames(data_model_laki_laki)[1] <- dependent_var # Pastikan nama kolom dependen sesuai
logistic_model_laki_laki <- polr(as.formula(formula_str_laki_laki), data = data_model_laki_laki, Hess = TRUE)
summary(logistic_model_laki_laki)
## Call:
## polr(formula = as.formula(formula_str_laki_laki), data = data_model_laki_laki,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## LD1 -1.12119 0.2055 -5.4559
## LD2 -0.14756 0.1832 -0.8054
## LD3 0.17969 0.1983 0.9062
## LD4 0.29687 0.2353 1.2619
## LD5 -0.09428 0.2245 -0.4200
## LD6 -0.19104 0.2369 -0.8066
##
## Intercepts:
## Value Std. Error t value
## 1|2 -2.2997 0.4067 -5.6544
## 2|3 -1.0903 0.3191 -3.4170
## 3|4 0.2090 0.2965 0.7048
## 4|5 0.5173 0.3026 1.7094
## 5|6 1.5720 0.3495 4.4984
## 6|7 3.4580 0.5651 6.1193
##
## Residual Deviance: 207.6091
## AIC: 231.6091
# Uji Likelihood Ratio untuk godness of fit (Laki-laki)
null_model_laki_laki <- polr(as.formula(paste(dependent_var, "~ 1")), data = cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), Hess = TRUE)
lr_test_laki_laki <- anova(null_model_laki_laki, logistic_model_laki_laki)
cat("\nUji Likelihood Ratio (Laki-laki):\n")
##
## Uji Likelihood Ratio (Laki-laki):
print(lr_test_laki_laki)
## Likelihood ratio tests of ordinal regression models
##
## Response: Usia
## Model Resid. df Resid. Dev Test Df LR stat.
## 1 1 60 245.2579
## 2 LD1 + LD2 + LD3 + LD4 + LD5 + LD6 54 207.6091 1 vs 2 6 37.64879
## Pr(Chi)
## 1
## 2 1.315659e-06
# Uji signifikan parsial (wald) untuk Laki-laki
coeftest_result_laki_laki <- coeftest(logistic_model_laki_laki)
cat("\nUji Signifikansi Parsial (Wald) (Laki-laki):\n")
##
## Uji Signifikansi Parsial (Wald) (Laki-laki):
print(coeftest_result_laki_laki)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## LD1 -1.121186 0.205501 -5.4559 1.25e-06 ***
## LD2 -0.147557 0.183208 -0.8054 0.4241
## LD3 0.179690 0.198283 0.9062 0.3688
## LD4 0.296867 0.235261 1.2619 0.2124
## LD5 -0.094278 0.224451 -0.4200 0.6761
## LD6 -0.191039 0.236851 -0.8066 0.4234
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Odds ratio untuk Laki-laki
odds_ratios_laki_laki <- exp(coef(logistic_model_laki_laki))
cat("\nOdds Ratios (Laki-laki):\n")
##
## Odds Ratios (Laki-laki):
print(odds_ratios_laki_laki)
## LD1 LD2 LD3 LD4 LD5 LD6
## 0.3258932 0.8628135 1.1968467 1.3456365 0.9100294 0.8261002
# Prediksi dan Evaluasi Model untuk Perempuan
predictions_perempuan <- predict(logistic_model_perempuan, newdata = cbind(clean_data_perempuan[dependent_var], d_scores_perempuan), type = "class")
actual_perempuan <- clean_data_perempuan[[dependent_var]]
confusion_matrix_perempuan <- table(Prediction = predictions_perempuan, Actual = actual_perempuan)
cat("\nConfusion Matrix (Perempuan):\n")
##
## Confusion Matrix (Perempuan):
print(confusion_matrix_perempuan)
## Actual
## Prediction 1 2 3 4 5 6 7
## 1 6 2 2 1 1 0 0
## 2 0 0 0 0 0 0 0
## 3 1 3 5 2 3 1 0
## 4 0 0 0 0 0 0 0
## 5 1 0 3 0 4 1 1
## 6 2 0 2 1 3 7 5
## 7 0 0 0 1 0 3 5
accuracy_perempuan <- sum(diag(confusion_matrix_perempuan)) / sum(confusion_matrix_perempuan)
cat("\nAkurasi (Perempuan):\n")
##
## Akurasi (Perempuan):
print(accuracy_perempuan)
## [1] 0.4090909
# Prediksi dan Evaluasi Model untuk Laki-laki
predictions_laki_laki <- predict(logistic_model_laki_laki, newdata = cbind(clean_data_laki_laki[dependent_var], d_scores_laki_laki), type = "class")
actual_laki_laki <- clean_data_laki_laki[[dependent_var]]
confusion_matrix_laki_laki <- table(Prediction = predictions_laki_laki, Actual = actual_laki_laki)
cat("\nConfusion Matrix (Laki-laki):\n")
##
## Confusion Matrix (Laki-laki):
print(confusion_matrix_laki_laki)
## Actual
## Prediction 1 2 3 4 5 6 7
## 1 6 3 5 0 0 0 0
## 2 3 1 0 0 0 0 0
## 3 1 5 8 2 4 3 1
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 1 2 0
## 6 1 2 1 1 5 6 3
## 7 0 0 0 0 0 1 1
accuracy_laki_laki <- sum(diag(confusion_matrix_laki_laki)) / sum(confusion_matrix_laki_laki)
cat("\nAkurasi (Laki-laki):\n")
##
## Akurasi (Laki-laki):
print(accuracy_laki_laki)
## [1] 0.3484848
# Kesimpulan
# Menghubungkan hasil regresi dengan kontribusi variabel asli
contributions_df_perempuan <- data.frame(
Discriminant = colnames(d_scores_perempuan),
Variable = top_contributors_perempuan,
Estimate = coef(logistic_model_perempuan),
Std.Error = sqrt(diag(vcov(logistic_model_perempuan))),
z.value = coef(logistic_model_perempuan) / sqrt(diag(vcov(logistic_model_perempuan))),
p.value = 2 * (1 - pnorm(abs(coef(logistic_model_perempuan) / sqrt(diag(vcov(logistic_model_perempuan))))))
)
## Warning in data.frame(Discriminant = colnames(d_scores_perempuan), Variable =
## top_contributors_perempuan, : row names were found from a short variable and
## have been discarded
contributions_df_perempuan$Significance <- ifelse(contributions_df_perempuan$p.value < 0.001, "Signifikan", "Tidak Signifikan")
cat("\nKesimpulan (Perempuan):\n")
##
## Kesimpulan (Perempuan):
print(contributions_df_perempuan)
## Discriminant Variable Estimate Std.Error z.value p.value
## 1 LD1 MRB_S_ -0.57284853 0.1679528 -3.4107716 6.477934e-04
## 2 LD2 MRB_S_ -1.03115945 0.2190428 -4.7075703 2.506869e-06
## 3 LD3 MrB_S_ -0.52947022 0.2313338 -2.2887714 2.209264e-02
## 4 LD4 MrB_D_ 0.11660584 0.2161275 0.5395234 5.895257e-01
## 5 LD5 MrB_S_ 0.24925990 0.2321060 1.0739055 2.828651e-01
## 6 LD6 MrB_D_ 0.09964526 0.2533996 0.3932336 6.941470e-01
## 7 LD1 MRB_S_ -0.57284853 0.4486256 -1.2768966 2.016388e-01
## 8 LD2 MRB_S_ -1.03115945 0.3756340 -2.7451172 6.048930e-03
## 9 LD3 MrB_S_ -0.52947022 0.3049405 -1.7363065 8.250965e-02
## 10 LD4 MrB_D_ 0.11660584 0.2980551 0.3912224 6.956328e-01
## 11 LD5 MrB_S_ 0.24925990 0.3160656 0.7886333 4.303263e-01
## 12 LD6 MrB_D_ 0.09964526 0.4054512 0.2457639 8.058650e-01
## Significance
## 1 Signifikan
## 2 Signifikan
## 3 Tidak Signifikan
## 4 Tidak Signifikan
## 5 Tidak Signifikan
## 6 Tidak Signifikan
## 7 Tidak Signifikan
## 8 Tidak Signifikan
## 9 Tidak Signifikan
## 10 Tidak Signifikan
## 11 Tidak Signifikan
## 12 Tidak Signifikan
contributions_df_laki_laki <- data.frame(
Discriminant = colnames(d_scores_laki_laki),
Variable = top_contributors_laki_laki,
Estimate = coef(logistic_model_laki_laki),
Std.Error = sqrt(diag(vcov(logistic_model_laki_laki))),
z.value = coef(logistic_model_laki_laki) / sqrt(diag(vcov(logistic_model_laki_laki))),
p.value = 2 * (1 - pnorm(abs(coef(logistic_model_laki_laki) / sqrt(diag(vcov(logistic_model_laki_laki))))))
)
## Warning in data.frame(Discriminant = colnames(d_scores_laki_laki), Variable =
## top_contributors_laki_laki, : row names were found from a short variable and
## have been discarded
contributions_df_laki_laki$Significance <- ifelse(contributions_df_laki_laki$p.value < 0.001, "Signifikan", "Tidak Signifikan")
cat("\nKesimpulan (Laki-laki):\n")
##
## Kesimpulan (Laki-laki):
print(contributions_df_laki_laki)
## Discriminant Variable Estimate Std.Error z.value p.value
## 1 LD1 MRB_S_ -1.12118569 0.2055009 -5.4558670 4.873447e-08
## 2 LD2 MrB_S_ -0.14755668 0.1832084 -0.8054033 4.205870e-01
## 3 LD3 MA_D_ 0.17969037 0.1982828 0.9062329 3.648126e-01
## 4 LD4 MA_D_ 0.29686716 0.2352608 1.2618640 2.069977e-01
## 5 LD5 MRB_S_ -0.09427839 0.2244507 -0.4200405 6.744559e-01
## 6 LD6 MRB_D_ -0.19103922 0.2368507 -0.8065806 4.199082e-01
## 7 LD1 MRB_S_ -1.12118569 0.4067203 -2.7566505 5.839673e-03
## 8 LD2 MrB_S_ -0.14755668 0.3190840 -0.4624384 6.437670e-01
## 9 LD3 MA_D_ 0.17969037 0.2964832 0.6060728 5.444664e-01
## 10 LD4 MA_D_ 0.29686716 0.3026237 0.9809779 3.266036e-01
## 11 LD5 MRB_S_ -0.09427839 0.3494703 -0.2697751 7.873333e-01
## 12 LD6 MRB_D_ -0.19103922 0.5650897 -0.3380688 7.353113e-01
## Significance
## 1 Signifikan
## 2 Tidak Signifikan
## 3 Tidak Signifikan
## 4 Tidak Signifikan
## 5 Tidak Signifikan
## 6 Tidak Signifikan
## 7 Tidak Signifikan
## 8 Tidak Signifikan
## 9 Tidak Signifikan
## 10 Tidak Signifikan
## 11 Tidak Signifikan
## 12 Tidak Signifikan
# Menyediakan ringkasan dengan estimasi parameter untuk Perempuan
summary_table_perempuan <- summary(logistic_model_perempuan)
coefficients_perempuan <- summary_table_perempuan$coefficients[1:length(colnames(d_scores_perempuan)), ]
std_errors_perempuan <- coefficients_perempuan[, "Std. Error"]
z_values_perempuan <- coefficients_perempuan[, "t value"]
p_values_perempuan <- 2 * (1 - pnorm(abs(z_values_perempuan)))
# Menyediakan ringkasan dengan estimasi parameter untuk Laki-laki
summary_table_laki_laki <- summary(logistic_model_laki_laki)
coefficients_laki_laki <- summary_table_laki_laki$coefficients[1:length(colnames(d_scores_laki_laki)), ]
std_errors_laki_laki <- coefficients_laki_laki[, "Std. Error"]
z_values_laki_laki <- coefficients_laki_laki[, "t value"]
p_values_laki_laki <- 2 * (1 - pnorm(abs(z_values_laki_laki)))
# Sesuaikan dengan nama variabel asli
variable_names_perempuan <- top_contributors_perempuan
variable_names_laki_laki <- top_contributors_laki_laki
results_df_perempuan <- data.frame(
Discriminant = colnames(d_scores_perempuan),
Variable = variable_names_perempuan,
Estimate = coefficients_perempuan[, "Value"],
Std.Error = std_errors_perempuan,
z.value = z_values_perempuan,
p.value = p_values_perempuan
)
cat("\nEstimasi Parameter, Std. Error, dan P-value (Perempuan):\n")
##
## Estimasi Parameter, Std. Error, dan P-value (Perempuan):
print(results_df_perempuan)
## Discriminant Variable Estimate Std.Error z.value p.value
## LD1 LD1 MRB_S_ -0.57284853 0.1679528 -3.4107716 6.477934e-04
## LD2 LD2 MRB_S_ -1.03115945 0.2190428 -4.7075703 2.506869e-06
## LD3 LD3 MrB_S_ -0.52947022 0.2313338 -2.2887714 2.209264e-02
## LD4 LD4 MrB_D_ 0.11660584 0.2161275 0.5395234 5.895257e-01
## LD5 LD5 MrB_S_ 0.24925990 0.2321060 1.0739055 2.828651e-01
## LD6 LD6 MrB_D_ 0.09964526 0.2533996 0.3932336 6.941470e-01
results_df_laki_laki <- data.frame(
Discriminant = colnames(d_scores_laki_laki),
Variable = variable_names_laki_laki,
Estimate = coefficients_laki_laki[, "Value"],
Std.Error = std_errors_laki_laki,
z.value = z_values_laki_laki,
p.value = p_values_laki_laki
)
cat("\nEstimasi Parameter, Std. Error, dan P-value (Laki-laki):\n")
##
## Estimasi Parameter, Std. Error, dan P-value (Laki-laki):
print(results_df_laki_laki)
## Discriminant Variable Estimate Std.Error z.value p.value
## LD1 LD1 MRB_S_ -1.12118569 0.2055009 -5.4558670 4.873447e-08
## LD2 LD2 MrB_S_ -0.14755668 0.1832084 -0.8054033 4.205870e-01
## LD3 LD3 MA_D_ 0.17969037 0.1982828 0.9062329 3.648126e-01
## LD4 LD4 MA_D_ 0.29686716 0.2352608 1.2618640 2.069977e-01
## LD5 LD5 MRB_S_ -0.09427839 0.2244507 -0.4200405 6.744559e-01
## LD6 LD6 MRB_D_ -0.19103922 0.2368507 -0.8065806 4.199082e-01
# Kesimpulan untuk Perempuan
kesimpulan_perempuan <- contributions_df_perempuan %>%
group_by(Variable) %>%
summarise(Significance = list(Significance),
Estimate = list(Estimate))
cat("\nKesimpulan (Perempuan):\n")
##
## Kesimpulan (Perempuan):
for (i in 1:nrow(kesimpulan_perempuan)) {
var <- kesimpulan_perempuan$Variable[i]
signifikan_ld <- contributions_df_perempuan %>% filter(Variable == var & Significance == "Signifikan") %>% select(Discriminant, Estimate)
tidak_signifikan_ld <- contributions_df_perempuan %>% filter(Variable == var & Significance == "Tidak Signifikan") %>% select(Discriminant, Estimate)
cat(var, ":\n")
if (nrow(signifikan_ld) > 0) {
cat(" Signifikan di: \n")
for (j in 1:nrow(signifikan_ld)) {
cat(" ", signifikan_ld$Discriminant[j], "dengan estimasi parameter", signifikan_ld$Estimate[j], "\n")
}
}
if (nrow(tidak_signifikan_ld) > 0) {
cat(" Tidak Signifikan di: \n")
for (j in 1:nrow(tidak_signifikan_ld)) {
cat(" ", tidak_signifikan_ld$Discriminant[j], "dengan estimasi parameter", tidak_signifikan_ld$Estimate[j], "\n")
}
}
}
## MRB_S_ :
## Signifikan di:
## LD1 dengan estimasi parameter -0.5728485
## LD2 dengan estimasi parameter -1.031159
## Tidak Signifikan di:
## LD1 dengan estimasi parameter -0.5728485
## LD2 dengan estimasi parameter -1.031159
## MrB_D_ :
## Tidak Signifikan di:
## LD4 dengan estimasi parameter 0.1166058
## LD6 dengan estimasi parameter 0.09964526
## LD4 dengan estimasi parameter 0.1166058
## LD6 dengan estimasi parameter 0.09964526
## MrB_S_ :
## Tidak Signifikan di:
## LD3 dengan estimasi parameter -0.5294702
## LD5 dengan estimasi parameter 0.2492599
## LD3 dengan estimasi parameter -0.5294702
## LD5 dengan estimasi parameter 0.2492599
# Mengambil estimasi parameter yang signifikan saja untuk Perempuan
selected_estimates_perempuan <- contributions_df_perempuan %>%
filter(Significance == "Signifikan") %>%
select(Variable, Discriminant, Estimate)
cat("\nEstimasi Parameter yang Diambil (Perempuan):\n")
##
## Estimasi Parameter yang Diambil (Perempuan):
print(selected_estimates_perempuan)
## Variable Discriminant Estimate
## 1 MRB_S_ LD1 -0.5728485
## 2 MRB_S_ LD2 -1.0311594
# Kesimpulan untuk Laki-laki
kesimpulan_laki_laki <- contributions_df_laki_laki %>%
group_by(Variable) %>%
summarise(Significance = list(Significance),
Estimate = list(Estimate))
cat("\nKesimpulan (Laki-laki):\n")
##
## Kesimpulan (Laki-laki):
for (i in 1:nrow(kesimpulan_laki_laki)) {
var <- kesimpulan_laki_laki$Variable[i]
signifikan_ld <- contributions_df_laki_laki %>% filter(Variable == var & Significance == "Signifikan") %>% select(Discriminant, Estimate)
tidak_signifikan_ld <- contributions_df_laki_laki %>% filter(Variable == var & Significance == "Tidak Signifikan") %>% select(Discriminant, Estimate)
cat(var, ":\n")
if (nrow(signifikan_ld) > 0) {
cat(" Signifikan di: \n")
for (j in 1:nrow(signifikan_ld)) {
cat(" ", signifikan_ld$Discriminant[j], "dengan estimasi parameter", signifikan_ld$Estimate[j], "\n")
}
}
if (nrow(tidak_signifikan_ld) > 0) {
cat(" Tidak Signifikan di: \n")
for (j in 1:nrow(tidak_signifikan_ld)) {
cat(" ", tidak_signifikan_ld$Discriminant[j], "dengan estimasi parameter", tidak_signifikan_ld$Estimate[j], "\n")
}
}
}
## MA_D_ :
## Tidak Signifikan di:
## LD3 dengan estimasi parameter 0.1796904
## LD4 dengan estimasi parameter 0.2968672
## LD3 dengan estimasi parameter 0.1796904
## LD4 dengan estimasi parameter 0.2968672
## MRB_D_ :
## Tidak Signifikan di:
## LD6 dengan estimasi parameter -0.1910392
## LD6 dengan estimasi parameter -0.1910392
## MRB_S_ :
## Signifikan di:
## LD1 dengan estimasi parameter -1.121186
## Tidak Signifikan di:
## LD5 dengan estimasi parameter -0.09427839
## LD1 dengan estimasi parameter -1.121186
## LD5 dengan estimasi parameter -0.09427839
## MrB_S_ :
## Tidak Signifikan di:
## LD2 dengan estimasi parameter -0.1475567
## LD2 dengan estimasi parameter -0.1475567
# Mengambil estimasi parameter yang signifikan saja untuk Laki-laki
selected_estimates_laki_laki <- contributions_df_laki_laki %>%
filter(Significance == "Signifikan") %>%
select(Variable, Discriminant, Estimate)
cat("\nEstimasi Parameter yang Diambil (Laki-laki):\n")
##
## Estimasi Parameter yang Diambil (Laki-laki):
print(selected_estimates_laki_laki)
## Variable Discriminant Estimate
## 1 MRB_S_ LD1 -1.121186
# Menggabungkan variabel yang sama dan mengambil estimasi dengan p-value paling rendah untuk Perempuan
selected_estimates_perempuan <- contributions_df_perempuan %>%
filter(Significance == "Signifikan") %>%
group_by(Variable) %>%
arrange(p.value) %>%
slice(1) %>%
select(Variable, Discriminant, Estimate, p.value)
cat("\nEstimasi Parameter yang Diambil (Perempuan):\n")
##
## Estimasi Parameter yang Diambil (Perempuan):
print(selected_estimates_perempuan)
## # A tibble: 1 × 4
## # Groups: Variable [1]
## Variable Discriminant Estimate p.value
## <chr> <chr> <dbl> <dbl>
## 1 MRB_S_ LD2 -1.03 0.00000251
# Menggabungkan variabel yang sama dan mengambil estimasi dengan p-value paling rendah untuk Laki-laki
selected_estimates_laki_laki <- contributions_df_laki_laki %>%
filter(Significance == "Signifikan") %>%
group_by(Variable) %>%
arrange(p.value) %>%
slice(1) %>%
select(Variable, Discriminant, Estimate, p.value)
cat("\nEstimasi Parameter yang Diambil (Laki-laki):\n")
##
## Estimasi Parameter yang Diambil (Laki-laki):
print(selected_estimates_laki_laki)
## # A tibble: 1 × 4
## # Groups: Variable [1]
## Variable Discriminant Estimate p.value
## <chr> <chr> <dbl> <dbl>
## 1 MRB_S_ LD1 -1.12 0.0000000487
library(ggplot2)
library(dplyr)
library(readxl)
# Memastikan nama kolom yang benar
colnames(data) <- c("Usia", "Jenis_Kelamin", colnames(data)[3:ncol(data)])
# Menggabungkan data laki-laki dan perempuan
data_combined <- bind_rows(
data %>% filter(Jenis_Kelamin == "Perempuan"),
data %>% filter(Jenis_Kelamin == "Laki-laki")
)
# Memetakan kategori usia
map_usia_category <- function(usia) {
if (usia == 1) {
return("6-10 tahun")
} else if (usia == 2) {
return("11-15 tahun")
} else if (usia == 3) {
return("16-20 tahun")
} else if (usia == 4) {
return("21-25 tahun")
} else if (usia == 5) {
return("26-30 tahun")
} else if (usia == 6) {
return("31-35 tahun")
} else if (usia == 7) {
return("36-40 tahun")
} else if (usia == 8) {
return("41-45 tahun")
} else if (usia == 9) {
return("46-50 tahun")
} else {
return("Unknown")
}
}
data_combined <- data_combined %>%
mutate(Kategori_Usia = sapply(Usia, map_usia_category))
category_order <- c("6-10 tahun", "11-15 tahun", "16-20 tahun", "21-25 tahun",
"26-30 tahun", "31-35 tahun", "36-40 tahun", "41-45 tahun", "46-50 tahun")
data_combined$Kategori_Usia <- factor(data_combined$Kategori_Usia, levels = category_order, ordered = TRUE)
usia_count_combined <- data_combined %>%
group_by(Kategori_Usia, Jenis_Kelamin) %>%
summarise(Frekuensi = n(), .groups = 'drop')
# Membuat barplot dengan ggplot2
barplot_combined <- ggplot(usia_count_combined, aes(x = Kategori_Usia, y = Frekuensi, fill = Jenis_Kelamin)) +
geom_bar(stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Frekuensi Kategori Usia Berdasarkan Jenis Kelamin", x = "Kategori Usia", y = "Frekuensi") +
scale_fill_manual(values = c("Perempuan" = "pink", "Laki-laki" = "blue"))
print(barplot_combined)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%() masks rlang::%@%()
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ purrr::flatten() masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ purrr::invoke() masks rlang::invoke()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ plotly::select() masks MASS::select(), dplyr::select()
## ✖ purrr::some() masks car::some()
## ✖ purrr::splice() masks rlang::splice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cutpointr)
##
## Attaching package: 'cutpointr'
##
## The following objects are masked from 'package:pROC':
##
## auc, roc
# Daftar variabel yang akan dianalisis
variables <- colnames(data)[3:ncol(data)]
# Fungsi untuk melakukan cut-point analysis pada setiap variabel
cutpoint_analysis <- function(var, data) {
data <- data %>% drop_na(var) # Menghapus baris yang mengandung NA pada variabel yang dianalisis
median_data <- mean(data[[var]], na.rm = TRUE)
data$class <- ifelse(data[[var]] > median_data, 1, 2)
if (length(unique(data$class)) == 2) { # Pastikan ada dua kelas
opt_cut <- cutpointr(data, !!sym(var), class, method = minimize_metric, metric = misclassification_cost, na.rm = TRUE)
cat("\nAnalisis untuk variabel:", var, "\n")
print(summary(opt_cut))
plot_obj = plot_metric(opt_cut)
print(plot_obj)
# Hitung p-value menggunakan uji statistik yang sesuai
# Contoh: uji t-test untuk membandingkan rata-rata kedua kelompok
p_value <- t.test(data[[var]] ~ data$class)$p.value
cat("P-value:", p_value, "\n")
} else {
cat("\nVariabel", var, "tidak memiliki dua kelas setelah pembagian berdasarkan median.\n")
}
}
# Melakukan analisis untuk setiap variabel
for (var in variables) {
cutpoint_analysis(var, data_perempuan)
}
## Assuming the positive class is 1
## Assuming the positive class has higher x values
##
## Analisis untuk variabel: BB1
## Method: minimize_metric
## Predictor: BB1
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 47 37
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 118.8 0 1 1 1 47 0 0
## tn
## 37
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 105.0 110.135 114.325 120.0 118.7007 121.95 126.805 131.9 5.350980
## 1 118.8 119.420 121.100 121.7 122.6119 123.85 128.530 131.9 2.815847
## 2 105.0 108.040 112.000 113.8 113.7324 116.80 118.100 118.7 3.246883
## NAs
## 0
## 0
## 0
## P-value: 7.808486e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: BB2
## Method: minimize_metric
## Predictor: BB2
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 43 41
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 94.5 0 1 1 1 43 0 0
## tn
## 41
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 84.3 88.015 90.725 94.55 94.45000 97.525 101.78 106.4 4.655665 0
## 1 94.5 94.730 96.100 97.50 98.19535 99.900 104.22 106.4 2.882951 0
## 2 84.3 86.500 89.130 89.90 90.52195 92.700 93.50 93.8 2.313889 0
## P-value: 2.893983e-22
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: BB3
## Method: minimize_metric
## Predictor: BB3
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 42 42
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 94.4 0 1 1 1 42 0 0
## tn
## 42
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 83.5 86.320 90.975 94.30 94.22738 97.100 100.800 104.4 4.491420 0
## 1 94.4 94.425 95.975 97.10 97.92381 99.725 101.575 104.4 2.499933 0
## 2 83.5 85.225 89.675 90.95 90.53095 92.400 93.790 94.2 2.567953 0
## P-value: 2.708748e-22
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MA_D_
## Method: minimize_metric
## Predictor: MA_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 41 43
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 126.7 0 1 1 1 41 0 0
## tn
## 43
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 104.9 113.925 122.10 126.2 126.3440 130.95 136.805 141.3 7.011185
## 1 126.7 127.200 129.20 131.1 132.0024 134.50 140.700 141.3 3.911233
## 2 104.9 112.030 119.85 122.1 120.9488 124.15 125.790 126.2 4.638677
## NAs
## 0
## 0
## 0
## P-value: 2.594336e-19
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MA_S_
## Method: minimize_metric
## Predictor: MA_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 32 52
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 127.8 0 1 1 1 32 0 0
## tn
## 52
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 104.9 118.175 121.025 125.85 127.7095 131.475 135.240 276.2 17.526219
## 1 127.8 128.210 130.250 132.30 136.9656 134.050 140.165 276.2 25.583621
## 2 104.9 117.370 120.125 122.50 122.0135 124.775 127.035 127.5 3.900831
## NAs
## 0
## 0
## 0
## P-value: 0.002497086
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: RL_D_
## Method: minimize_metric
## Predictor: RL_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 40 44
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 54.6 0 1 1 1 40 0 0
## tn
## 44
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 40.8 46.315 51.275 53.95 54.41310 57.975 62.480 69.3 5.214594 0
## 1 54.6 54.890 56.550 58.25 58.67250 59.825 65.200 69.3 3.242347 0
## 2 40.8 43.250 49.775 51.45 50.54091 52.825 53.785 54.1 3.276081 0
## P-value: 1.373489e-18
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MRB_D_
## Method: minimize_metric
## Predictor: MRB_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 32 52
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 42.7 0 1 1 1 32 0 0
## tn
## 52
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 30.4 35.615 39.225 41.45 42.39762 44.000 55.035 65.5 5.938236 0
## 1 42.7 43.165 43.975 44.25 47.45938 47.650 62.185 65.5 6.484317 0
## 2 30.4 35.265 38.450 40.00 39.28269 40.875 41.945 42.2 2.421045 0
## P-value: 4.961185e-08
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MRB_S_
## Method: minimize_metric
## Predictor: MRB_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 35 49
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 42.7 0 1 1 1 35 0 0
## tn
## 49
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 33.1 35.765 39.9 42.05 42.51726 43.8 54.445 65.4 5.457721 0
## 1 42.7 42.870 43.2 44.10 46.60143 46.4 61.530 65.4 5.951452 0
## 2 33.1 35.380 38.2 40.10 39.60000 41.2 42.200 42.5 2.359378 0
## P-value: 5.641212e-08
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MrB_D_
## Method: minimize_metric
## Predictor: MrB_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 38 46
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 31.3 0 1 1 1 38 0 0
## tn
## 46
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 20.7 27.045 29.75 30.95 31.20000 33.200 36.260 40.0 3.212513 0
## 1 31.3 31.570 32.55 33.30 33.81579 33.975 38.725 40.0 2.054991 0
## 2 20.7 23.925 28.70 29.80 29.03913 30.200 31.150 31.2 2.238698 0
## P-value: 3.705876e-16
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MrB_S_
## Method: minimize_metric
## Predictor: MrB_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 84 42 42
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 31.9 0 1 1 1 42 0 0
## tn
## 42
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 25.1 27.03 30.100 31.85 31.83095 33.100 37.305 41.5 3.102699 0
## 1 31.9 31.91 32.425 33.10 34.13333 35.075 39.865 41.5 2.517225 0
## 2 25.1 26.71 29.025 30.10 29.52857 30.375 31.200 31.8 1.513724 0

## P-value: 3.238364e-15
library(tidyverse)
library(cutpointr)
# Daftar variabel yang akan dianalisis
variables <- colnames(data)[3:ncol(data)]
# Fungsi untuk melakukan cut-point analysis pada setiap variabel
cutpoint_analysis <- function(var, data) {
data <- data %>% drop_na(var) # Menghapus baris yang mengandung NA pada variabel yang dianalisis
median_data <- mean(data[[var]], na.rm = TRUE)
data$class <- ifelse(data[[var]] > median_data, 1, 2)
if (length(unique(data$class)) == 2) { # Pastikan ada dua kelas
opt_cut <- cutpointr(data, !!sym(var), class, method = minimize_metric, metric = misclassification_cost, na.rm = TRUE)
cat("\nAnalisis untuk variabel:", var, "\n")
print(summary(opt_cut))
plot_obj = plot_metric(opt_cut)
print(plot_obj)
# Hitung p-value menggunakan uji statistik yang sesuai
# Contoh: uji t-test untuk membandingkan rata-rata kedua kelompok
p_value <- t.test(data[[var]] ~ data$class)$p.value
cat("P-value:", p_value, "\n")
} else {
cat("\nVariabel", var, "tidak memiliki dua kelas setelah pembagian berdasarkan median.\n")
}
}
# Melakukan analisis untuk setiap variabel
for (var in variables) {
cutpoint_analysis(var, data_laki_laki)
}
## Assuming the positive class is 1
## Assuming the positive class has higher x values
##
## Analisis untuk variabel: BB1
## Method: minimize_metric
## Predictor: BB1
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 51 31
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 125.1 0 1 1 1 51 0 0
## tn
## 31
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 111.9 114.915 121.025 125.95 125.0690 127.875 135.09 138.6 5.821568
## 1 125.1 125.300 126.450 127.40 128.6914 129.900 135.60 138.6 3.394217
## 2 111.9 113.700 116.100 119.80 119.1097 121.550 124.60 124.8 3.646858
## NAs
## 0
## 0
## 0
## P-value: 2.476944e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: BB2
## Method: minimize_metric
## Predictor: BB2
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 42 40
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 99.9 0 1 1 1 42 0 0
## tn
## 40
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 90.0 90.905 96.175 100.1 99.49634 102.900 106.790 124.8 5.766152
## 1 99.9 100.300 101.400 102.9 103.82381 105.575 107.865 124.8 4.192316
## 2 90.0 90.575 92.075 95.7 94.95250 97.625 98.910 99.4 3.038808
## NAs
## 0
## 0
## 0
## P-value: 2.622599e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: BB3
## Method: minimize_metric
## Predictor: BB3
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 41 41
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 101.8 0 1 1 1 41 0 0
## tn
## 41
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 85.4 91.81 96.2 101.7 101.60000 105.475 113.795 118.0 6.906590
## 1 101.8 102.20 103.4 105.5 107.06829 109.600 114.700 118.0 4.412960
## 2 85.4 88.50 93.8 96.2 96.13171 99.600 101.300 101.6 3.977527
## NAs
## 0
## 0
## 0
## P-value: 4.222631e-19
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MA_D_
## Method: minimize_metric
## Predictor: MA_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 47 35
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 125.4 0 1 1 1 47 0 0
## tn
## 35
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 29.7 113.605 118.65 126.1 124.1024 130.45 135.79 141.6 13.159543
## 1 125.4 125.700 127.75 129.8 130.9532 134.80 136.24 141.6 3.965934
## 2 29.7 107.510 115.15 118.2 114.9029 120.20 122.12 124.1 15.468725
## NAs
## 0
## 0
## 0
## P-value: 6.191557e-07
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MA_S_
## Method: minimize_metric
## Predictor: MA_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 47 35
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 125.8 0 1 1 1 47 0 0
## tn
## 35
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD
## Overall 107.4 114.51 120.025 126.2 125.7866 131.55 136.175 145.3 7.649720
## 1 125.8 126.03 127.100 130.4 131.2255 134.70 137.270 145.3 4.382002
## 2 107.4 112.14 115.400 119.0 118.4829 121.55 124.600 125.5 4.198283
## NAs
## 0
## 0
## 0
## P-value: 1.695021e-21
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: RL_D_
## Method: minimize_metric
## Predictor: RL_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 34 48
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 61.6 0 1 1 1 34 0 0
## tn
## 48
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 48.0 52.915 57.350 60.00 61.30122 64.075 68.570 128.1 8.946101 0
## 1 61.6 61.995 63.225 64.85 67.18235 67.325 71.475 128.1 11.124842 0
## 2 48.0 52.185 55.700 58.00 57.13542 59.600 60.400 60.8 2.878866 0
## P-value: 9.530662e-06
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MRB_D_
## Method: minimize_metric
## Predictor: MRB_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 29 53
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 46.4 0 1 1 1 29 0 0
## tn
## 53
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 36.7 38.715 41.975 45.1 46.20610 48.3 65.615 73.8 7.090915 0
## 1 46.4 46.500 48.000 50.0 52.84828 52.9 68.720 73.8 7.867547 0
## 2 36.7 38.660 40.100 43.2 42.57170 45.0 45.740 46.1 2.631567 0
## P-value: 1.102146e-07
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MRB_S_
## Method: minimize_metric
## Predictor: MRB_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 23 59
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 47.8 0 1 1 1 23 0 0
## tn
## 59
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 36.8 39.135 42.50 44.75 47.34146 49.25 68.535 94.1 9.394521 0
## 1 47.8 48.340 50.00 51.00 57.87826 67.30 76.810 94.1 12.061507 0
## 2 36.8 38.610 41.65 43.70 43.23390 45.15 46.910 47.2 2.623673 0
## P-value: 7.279231e-06
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MrB_D_
## Method: minimize_metric
## Predictor: MrB_D_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 34 48
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 33.7 0 1 1 1 34 0 0
## tn
## 48
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 23.0 28.21 31.025 32.50 33.64634 36.500 39.995 45.0 4.129365 0
## 1 33.7 34.13 35.300 37.80 37.67647 39.525 41.590 45.0 2.772600 0
## 2 23.0 27.57 29.900 31.25 30.79167 32.225 33.200 33.4 1.979236 0
## P-value: 1.035574e-17
## Assuming the positive class is 1
## Assuming the positive class has higher x values

##
## Analisis untuk variabel: MrB_S_
## Method: minimize_metric
## Predictor: MrB_S_
## Outcome: class
## Direction: >=
##
## AUC n n_pos n_neg
## 1 82 30 52
##
## optimal_cutpoint misclassification_cost acc sensitivity specificity tp fn fp
## 34.6 0 1 1 1 30 0 0
## tn
## 52
##
## Predictor summary:
## Data Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## Overall 23.0 28.805 31.125 33.40 34.14268 36.375 40.895 45.4 4.250549 0
## 1 34.6 35.145 36.225 38.75 38.73667 40.425 44.540 45.4 2.910798 0
## 2 23.0 28.440 30.575 31.50 31.49231 33.100 34.100 34.1 2.073724 0

## P-value: 8.709931e-16