#install.packages(c("tidyverse", "ggplot2", "corrplot", "car", "nnet", "reshape2", "gridExtra"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(nnet)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
OPEN FILE
df <- read.csv("glass.csv",
stringsAsFactors = FALSE,
fileEncoding = "UTF-8-BOM")
head(df)
## RI Na Mg Al Si K Ca Ba Fe Type
## 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0 0.00 1
## 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0 0.00 1
## 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0 0.00 1
## 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0 0.00 1
## 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0 0.00 1
## 6 1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0 0.26 1
PRE-PROCESSING
df <- df %>%
filter(Type %in% c(1, 2, 3)) %>%
mutate(
Type = factor(Type,
levels = c(1, 2, 3),
labels = c("bldg_float",
"bldg_non_float",
"vehicle_float"))
) %>%
drop_na()
df$Type <- relevel(df$Type, ref = "bldg_float")
cat("Dimensi setelah filter 3 kategori:\n")
## Dimensi setelah filter 3 kategori:
cat(" ", nrow(df), "baris x", ncol(df), "kolom\n\n")
## 163 baris x 10 kolom
warna <- c("bldg_float" = "#2A9D8F",
"bldg_non_float" = "#E76F51",
"vehicle_float" = "#457B9D")
str(df)
## 'data.frame': 163 obs. of 10 variables:
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type: Factor w/ 3 levels "bldg_float","bldg_non_float",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(df)
## RI Na Mg Al
## Min. :1.512 Min. :10.73 Min. :0.000 Min. :0.290
## 1st Qu.:1.517 1st Qu.:12.87 1st Qu.:3.425 1st Qu.:1.145
## Median :1.518 Median :13.21 Median :3.540 Median :1.290
## Mean :1.519 Mean :13.20 Mean :3.295 Mean :1.282
## 3rd Qu.:1.519 3rd Qu.:13.52 3rd Qu.:3.650 3rd Qu.:1.490
## Max. :1.534 Max. :14.86 Max. :4.490 Max. :2.120
## Si K Ca Ba
## Min. :69.81 Min. :0.0000 Min. : 7.080 Min. :0.00000
## 1st Qu.:72.19 1st Qu.:0.3850 1st Qu.: 8.210 1st Qu.:0.00000
## Median :72.74 Median :0.5700 Median : 8.550 Median :0.00000
## Mean :72.59 Mean :0.4775 Mean : 8.925 Mean :0.02982
## 3rd Qu.:73.00 3rd Qu.:0.6100 3rd Qu.: 9.010 3rd Qu.:0.00000
## Max. :74.45 Max. :1.1000 Max. :16.190 Max. :3.15000
## Fe Type
## Min. :0.00000 bldg_float :70
## 1st Qu.:0.00000 bldg_non_float:76
## Median :0.00000 vehicle_float :17
## Mean :0.06761
## 3rd Qu.:0.13000
## Max. :0.37000
tbl <- table(df$Type)
print(data.frame(
Jenis_Kaca = names(tbl),
Frekuensi = as.integer(tbl),
Persen = paste0(round(prop.table(tbl) * 100, 1), "%")
))
## Jenis_Kaca Frekuensi Persen
## 1 bldg_float 70 42.9%
## 2 bldg_non_float 76 46.6%
## 3 vehicle_float 17 10.4%
#make_bp <- function(var, lab) {ggplot(df, aes_string(x = "Type", y = var, fill = "Type")) + geom_boxplot() + scale_fill_manual(values = warna)}
#make_bp("RI", "Refractive Index")
make_bp <- function(var, lab) {
ggplot(df, aes_string(x = "Type", y = var, fill = "Type")) +
geom_boxplot(alpha = 0.8, outlier.color = "red",
outlier.shape = 4, outlier.size = 1.5) +
scale_fill_manual(values = warna) +
scale_x_discrete(labels = c("Float","Non-Float","Vehicle")) +
labs(title = lab, x = "", y = lab) +
theme_bw(base_size = 9) +
theme(legend.position = "none")
}
PEMODELAN
num_vars <- c("RI","Na","Mg","Al","Si","K","Ca","Ba","Fe")
num_df <- df %>% select(all_of(num_vars))
df_bt <- df %>%
mutate(
RI_log = RI * log(RI),
Na_log = Na * log(Na + 0.001),
Mg_log = Mg * log(Mg + 0.001),
Al_log = Al * log(Al + 0.001),
Si_log = Si * log(Si + 0.001),
K_log = K * log(K + 0.001),
Ca_log = Ca * log(Ca + 0.001),
Ba_log = Ba * log(Ba + 0.001),
Fe_log = Fe * log(Fe + 0.001)
)
log_terms <- c("RI_log","Na_log","Mg_log","Al_log","Si_log",
"K_log","Ca_log","Ba_log","Fe_log")
orig_terms <- num_vars
form_bt <- as.formula(paste(
"y_bin ~",
paste(c(orig_terms, log_terms), collapse = " + ")
))
df_bt12 <- df_bt %>%
filter(Type %in% c("bldg_float","bldg_non_float")) %>%
mutate(y_bin = as.integer(Type == "bldg_non_float"))
bt12 <- glm(form_bt, data = df_bt12, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
p_bt12 <- summary(bt12)$coefficients[log_terms, 4]
df_bt13 <- df_bt %>%
filter(Type %in% c("bldg_float","vehicle_float")) %>%
mutate(y_bin = as.integer(Type == "vehicle_float"))
bt13 <- glm(form_bt, data = df_bt13, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
p_bt13 <- summary(bt13)$coefficients[log_terms, 4]
UJI ASUMSI
bt_tbl <- data.frame(
Variabel = num_vars,
p_T2_vs_T1 = round(p_bt12, 4),
p_T3_vs_T1 = round(p_bt13, 4),
Status_T2_vs_T1 = ifelse(p_bt12 > 0.05, "✓ Linear", "✗ Tidak"),
Status_T3_vs_T1 = ifelse(p_bt13 > 0.05, "✓ Linear", "✗ Tidak"),
row.names = NULL
)
cat("Hasil Box-Tidwell (p-value X*ln(X)):\n")
## Hasil Box-Tidwell (p-value X*ln(X)):
print(bt_tbl)
## Variabel p_T2_vs_T1 p_T3_vs_T1 Status_T2_vs_T1 Status_T3_vs_T1
## 1 RI 0.2747 0.0155 ✓ Linear ✗ Tidak
## 2 Na 0.1696 0.0926 ✓ Linear ✓ Linear
## 3 Mg 0.0424 0.6920 ✗ Tidak ✓ Linear
## 4 Al 0.2455 0.4313 ✓ Linear ✓ Linear
## 5 Si 0.3851 0.0398 ✓ Linear ✗ Tidak
## 6 K 0.8261 0.1037 ✓ Linear ✓ Linear
## 7 Ca 0.0177 0.3211 ✗ Tidak ✓ Linear
## 8 Ba 0.3265 0.8968 ✓ Linear ✓ Linear
## 9 Fe 0.2972 0.6534 ✓ Linear ✓ Linear
cat("─── ASUMSI 1: LINEARITAS ───\n")
## ─── ASUMSI 1: LINEARITAS ───
n_nl <- sum(p_bt12 < 0.05) + sum(p_bt13 < 0.05)
cat(sprintf("\n=> %d dari 18 uji tidak linear\n", n_nl))
##
## => 4 dari 18 uji tidak linear
cat(ifelse(n_nl <= 4,
"=> Mayoritas LINEAR → Asumsi TERPENUHI ✓\n\n",
"=> Banyak tidak linear, pertimbangkan transformasi\n\n"))
## => Mayoritas LINEAR → Asumsi TERPENUHI ✓
cat("─── ASUMSI 2: INDEPENDENSI OBSERVASI ───\n")
## ─── ASUMSI 2: INDEPENDENSI OBSERVASI ───
n_dup <- sum(duplicated(df))
cat("Jumlah duplikat:", n_dup, "\n")
## Jumlah duplikat: 1
cat("Setiap baris = 1 sampel kaca yang berbeda\n")
## Setiap baris = 1 sampel kaca yang berbeda
cat("Diambil dari laboratorium forensik independen\n")
## Diambil dari laboratorium forensik independen
cat("=> Asumsi Independensi:",
ifelse(n_dup == 0, "TERPENUHI ✓\n\n",
paste("Ada", n_dup, "duplikat → dihapus\n\n")))
## => Asumsi Independensi: Ada 1 duplikat → dihapus
if (n_dup > 0) df <- distinct(df)
cat("─── ASUMSI 3: TIDAK ADA MULTIKOLINEARITAS ───\n")
## ─── ASUMSI 3: TIDAK ADA MULTIKOLINEARITAS ───
cat("Metode : Korelasi Pearson + VIF\n")
## Metode : Korelasi Pearson + VIF
cat("Kriteria: |r| < 0.8 dan VIF < 10\n\n")
## Kriteria: |r| < 0.8 dan VIF < 10
cor_mat <- cor(num_df)
cat("Matriks Korelasi:\n")
## Matriks Korelasi:
print(round(cor_mat, 3))
## RI Na Mg Al Si K Ca Ba Fe
## RI 1.000 -0.069 -0.567 -0.462 -0.691 -0.622 0.882 0.327 0.109
## Na -0.069 1.000 0.340 -0.272 -0.359 -0.367 -0.279 -0.301 -0.194
## Mg -0.567 0.340 1.000 0.006 0.133 0.216 -0.828 -0.287 -0.139
## Al -0.462 -0.272 0.006 1.000 0.171 0.713 -0.376 0.200 0.054
## Si -0.691 -0.359 0.133 0.171 1.000 0.426 -0.450 -0.354 -0.099
## K -0.622 -0.367 0.216 0.713 0.426 1.000 -0.534 0.012 0.075
## Ca 0.882 -0.279 -0.828 -0.376 -0.450 -0.534 1.000 0.241 0.139
## Ba 0.327 -0.301 -0.287 0.200 -0.354 0.012 0.241 1.000 0.174
## Fe 0.109 -0.194 -0.139 0.054 -0.099 0.075 0.139 0.174 1.000
# Deteksi pasangan korelasi tinggi
cat("\nPasangan variabel dengan |r| > 0.6:\n")
##
## Pasangan variabel dengan |r| > 0.6:
cor_up <- which(abs(cor_mat) > 0.6 & upper.tri(cor_mat), arr.ind = TRUE)
if (nrow(cor_up) == 0) {
cat(" Tidak ada\n")
} else {
for (i in seq_len(nrow(cor_up))) {
r1 <- rownames(cor_mat)[cor_up[i,1]]
r2 <- colnames(cor_mat)[cor_up[i,2]]
cat(sprintf(" %s & %s : r = %.3f\n",
r1, r2, cor_mat[cor_up[i,1], cor_up[i,2]]))
}
}
## RI & Si : r = -0.691
## RI & K : r = -0.622
## Al & K : r = 0.713
## RI & Ca : r = 0.882
## Mg & Ca : r = -0.828
Cek VIF
lm_vif <- lm(as.numeric(Type) ~ RI + Na + Mg + Al + Si + K + Ca + Ba + Fe,
data = df)
vif_v <- vif(lm_vif)
vif_tbl <- data.frame(
Variabel = names(vif_v),
VIF = round(vif_v, 3),
Status = ifelse(vif_v < 5, "✓ OK (< 5)",
ifelse(vif_v < 10, "⚠ Perhatian (5–10)",
"✗ Masalah (> 10)"))
)
cat("\nVIF:\n")
##
## VIF:
print(vif_tbl)
## Variabel VIF Status
## RI RI 13.258 ✗ Masalah (> 10)
## Na Na 31.051 ✗ Masalah (> 10)
## Mg Mg 75.729 ✗ Masalah (> 10)
## Al Al 12.162 ✗ Masalah (> 10)
## Si Si 37.072 ✗ Masalah (> 10)
## K K 7.804 ⚠ Perhatian (5–10)
## Ca Ca 199.254 ✗ Masalah (> 10)
## Ba Ba 7.513 ⚠ Perhatian (5–10)
## Fe Fe 1.148 ✓ OK (< 5)
cat("=> Asumsi Multikolinearitas:",
ifelse(all(vif_v < 10), "TERPENUHI ✓\n\n", "PERLU DITANGANI\n\n"))
## => Asumsi Multikolinearitas: PERLU DITANGANI
Cek Outliers
z_df <- df %>% select(all_of(num_vars)) %>%
mutate(across(everything(), ~abs(scale(.)[,1])))
out_z <- z_df %>% summarise(across(everything(), ~sum(. > 3)))
out_tbl <- data.frame(
Variabel = names(out_z),
Outlier_Zscore = as.integer(out_z),
Status = ifelse(as.integer(out_z) == 0, "✓ Tidak ada", "⚠ Ada")
)
cat("Outlier per variabel (|z| > 3):\n")
## Outlier per variabel (|z| > 3):
print(out_tbl)
## Variabel Outlier_Zscore Status
## 1 RI 3 ⚠ Ada
## 2 Na 3 ⚠ Ada
## 3 Mg 9 ⚠ Ada
## 4 Al 1 ⚠ Ada
## 5 Si 3 ⚠ Ada
## 6 K 0 ✓ Tidak ada
## 7 Ca 7 ⚠ Ada
## 8 Ba 1 ⚠ Ada
## 9 Fe 1 ⚠ Ada
df_clean <- df %>%
filter(if_all(all_of(num_vars), ~ abs(scale(.)[,1]) <= 3))
cat(sprintf("\nBaris sebelum hapus outlier : %d\n", nrow(df)))
##
## Baris sebelum hapus outlier : 162
cat(sprintf("Baris setelah hapus outlier : %d\n", nrow(df_clean)))
## Baris setelah hapus outlier : 150
cat(sprintf("Outlier yang dihapus : %d baris\n\n", nrow(df) - nrow(df_clean)))
## Outlier yang dihapus : 12 baris
num_df_clean <- df_clean %>% select(all_of(num_vars))
Cek Korelasi
cor_mat <- cor(num_df_clean)
cat("Matriks Korelasi:\n")
## Matriks Korelasi:
print(round(cor_mat, 3))
## RI Na Mg Al Si K Ca Ba Fe
## RI 1.000 0.410 -0.010 -0.644 -0.682 -0.692 0.810 0.105 -0.024
## Na 0.410 1.000 0.053 -0.230 -0.758 -0.534 0.184 0.111 -0.169
## Mg -0.010 0.053 1.000 -0.254 -0.102 -0.128 -0.396 -0.111 -0.098
## Al -0.644 -0.230 -0.254 1.000 0.280 0.651 -0.592 0.012 -0.051
## Si -0.682 -0.758 -0.102 0.280 1.000 0.541 -0.506 -0.139 0.065
## K -0.692 -0.534 -0.128 0.651 0.541 1.000 -0.609 -0.139 0.040
## Ca 0.810 0.184 -0.396 -0.592 -0.506 -0.609 1.000 0.056 0.095
## Ba 0.105 0.111 -0.111 0.012 -0.139 -0.139 0.056 1.000 0.052
## Fe -0.024 -0.169 -0.098 -0.051 0.065 0.040 0.095 0.052 1.000
cat("\nPasangan variabel dengan |r| > 0.6:\n")
##
## Pasangan variabel dengan |r| > 0.6:
cor_up <- which(abs(cor_mat) > 0.6 & upper.tri(cor_mat), arr.ind = TRUE)
if (nrow(cor_up) == 0) {
cat(" Tidak ada\n")
} else {
for (i in seq_len(nrow(cor_up))) {
r1 <- rownames(cor_mat)[cor_up[i,1]]
r2 <- colnames(cor_mat)[cor_up[i,2]]
cat(sprintf(" %s & %s : r = %.3f\n",
r1, r2, cor_mat[cor_up[i,1], cor_up[i,2]]))
}
}
## RI & Al : r = -0.644
## RI & Si : r = -0.682
## Na & Si : r = -0.758
## RI & K : r = -0.692
## Al & K : r = 0.651
## RI & Ca : r = 0.810
## K & Ca : r = -0.609
Cek FIV ke-2
lm_vif <- lm(as.numeric(Type) ~ RI + Na + Mg + Al + Si + K + Ca + Ba + Fe,
data = df_clean) # <-- df_clean
vif_v <- vif(lm_vif)
vif_tbl <- data.frame(
Variabel = names(vif_v),
VIF = round(vif_v, 3),
Status = ifelse(vif_v < 5, "✓ OK (< 5)",
ifelse(vif_v < 10, "⚠ Perhatian (5–10)",
"✗ Masalah (> 10)"))
)
cat("\nVIF:\n")
##
## VIF:
print(vif_tbl)
## Variabel VIF Status
## RI RI 5.885 ⚠ Perhatian (5–10)
## Na Na 18.137 ✗ Masalah (> 10)
## Mg Mg 16.832 ✗ Masalah (> 10)
## Al Al 10.642 ✗ Masalah (> 10)
## Si Si 23.731 ✗ Masalah (> 10)
## K K 6.393 ⚠ Perhatian (5–10)
## Ca Ca 46.013 ✗ Masalah (> 10)
## Ba Ba 1.368 ✓ OK (< 5)
## Fe Fe 1.108 ✓ OK (< 5)
cat("=> Asumsi Multikolinearitas:",
ifelse(all(vif_v < 10), "TERPENUHI ✓\n\n", "PERLU DITANGANI\n\n"))
## => Asumsi Multikolinearitas: PERLU DITANGANI
Iterasi
cat("=== Iterasi 1: Hapus Ca ===\n")
## === Iterasi 1: Hapus Ca ===
lm_vif2 <- lm(as.numeric(Type) ~ RI + Na + Mg + Al + Si + K + Ba + Fe,
data = df_clean) # Ca dihapus
vif_v2 <- vif(lm_vif2)
vif_tbl2 <- data.frame(
Variabel = names(vif_v2),
VIF = round(vif_v2, 3),
Status = ifelse(vif_v2 < 5, "✓ OK (< 5)",
ifelse(vif_v2 < 10, "⚠ Perhatian (5–10)",
"✗ Masalah (> 10)"))
)
print(vif_tbl2)
## Variabel VIF Status
## RI RI 4.815 ✓ OK (< 5)
## Na Na 3.298 ✓ OK (< 5)
## Mg Mg 1.323 ✓ OK (< 5)
## Al Al 2.802 ✓ OK (< 5)
## Si Si 5.142 ⚠ Perhatian (5–10)
## K K 2.810 ✓ OK (< 5)
## Ba Ba 1.060 ✓ OK (< 5)
## Fe Fe 1.091 ✓ OK (< 5)
cat("=> Masih ada VIF > 10:", sum(vif_v2 > 10), "variabel\n\n")
## => Masih ada VIF > 10: 0 variabel
cat("=== Iterasi 2: Hapus Si ===\n")
## === Iterasi 2: Hapus Si ===
lm_vif3 <- lm(as.numeric(Type) ~ RI + Na + Mg + Al + K + Ba + Fe,
data = df_clean) # Ca dan Si dihapus
vif_v3 <- vif(lm_vif3)
vif_tbl3 <- data.frame(
Variabel = names(vif_v3),
VIF = round(vif_v3, 3),
Status = ifelse(vif_v3 < 5, "✓ OK (< 5)",
ifelse(vif_v3 < 10, "⚠ Perhatian (5–10)",
"✗ Masalah (> 10)"))
)
print(vif_tbl3)
## Variabel VIF Status
## RI RI 2.392 ✓ OK (< 5)
## Na Na 1.518 ✓ OK (< 5)
## Mg Mg 1.168 ✓ OK (< 5)
## Al Al 2.355 ✓ OK (< 5)
## K K 2.778 ✓ OK (< 5)
## Ba Ba 1.057 ✓ OK (< 5)
## Fe Fe 1.060 ✓ OK (< 5)
cat("=> Masih ada VIF > 10:", sum(vif_v3 > 10), "variabel\n\n")
## => Masih ada VIF > 10: 0 variabel
cat("=== PERBANDINGAN ===\n")
## === PERBANDINGAN ===
cat(sprintf("Tanpa Ca : maks VIF = %.3f | variabel masalah = %d\n",
max(vif_v2), sum(vif_v2 > 10)))
## Tanpa Ca : maks VIF = 5.142 | variabel masalah = 0
cat(sprintf("Tanpa Ca & Si : maks VIF = %.3f | variabel masalah = %d\n",
max(vif_v3), sum(vif_v3 > 10)))
## Tanpa Ca & Si : maks VIF = 2.778 | variabel masalah = 0
# Simpan variabel final sesuai hasil terbaik
# Jika iterasi 1 cukup (semua VIF < 10):
vars_final <- c("RI", "Na", "Mg", "Al", "Si", "K", "Ba", "Fe") # tanpa Ca
df_final <- df_clean %>% select(all_of(vars_final), Type)
cat("\nDataset final:", nrow(df_final), "baris x", ncol(df_final), "kolom\n")
##
## Dataset final: 150 baris x 9 kolom
cat("Variabel prediktor:", paste(vars_final, collapse = ", "), "\n")
## Variabel prediktor: RI, Na, Mg, Al, Si, K, Ba, Fe
# Correlation matrix tanpa Ca dan Si
num_vars_noCa <- c("RI", "Na", "Mg", "Al", "K", "Ba", "Fe")
num_df_noCa <- df_clean %>% select(all_of(num_vars_noCa))
cor_mat_noCa <- cor(num_df_noCa)
cat("Matriks Korelasi (tanpa Ca):\n")
## Matriks Korelasi (tanpa Ca):
print(round(cor_mat_noCa, 3))
## RI Na Mg Al K Ba Fe
## RI 1.000 0.410 -0.010 -0.644 -0.692 0.105 -0.024
## Na 0.410 1.000 0.053 -0.230 -0.534 0.111 -0.169
## Mg -0.010 0.053 1.000 -0.254 -0.128 -0.111 -0.098
## Al -0.644 -0.230 -0.254 1.000 0.651 0.012 -0.051
## K -0.692 -0.534 -0.128 0.651 1.000 -0.139 0.040
## Ba 0.105 0.111 -0.111 0.012 -0.139 1.000 0.052
## Fe -0.024 -0.169 -0.098 -0.051 0.040 0.052 1.000
cat("\nPasangan variabel dengan |r| > 0.6:\n")
##
## Pasangan variabel dengan |r| > 0.6:
cor_up <- which(abs(cor_mat_noCa) > 0.6 & upper.tri(cor_mat_noCa), arr.ind = TRUE)
if (nrow(cor_up) == 0) {
cat(" Tidak ada\n")
} else {
for (i in seq_len(nrow(cor_up))) {
r1 <- rownames(cor_mat_noCa)[cor_up[i, 1]]
r2 <- colnames(cor_mat_noCa)[cor_up[i, 2]]
cat(sprintf(" %s & %s : r = %.3f\n",
r1, r2, cor_mat_noCa[cor_up[i,1], cor_up[i,2]]))
}
}
## RI & Al : r = -0.644
## RI & K : r = -0.692
## Al & K : r = 0.651
# Dataset final siap untuk model
vars_final <- c("RI", "Na", "Mg", "Al", "K", "Ba", "Fe")
df_final <- df_clean %>% select(all_of(vars_final), Type)
cat("=> Asumsi Multikolinearitas: TERPENUHI ✓\n")
## => Asumsi Multikolinearitas: TERPENUHI ✓
cat(" Variabel Ca dan Si dihapus (VIF > 10)\n")
## Variabel Ca dan Si dihapus (VIF > 10)
cat(" Semua VIF tersisa < 5\n")
## Semua VIF tersisa < 5
cat(" Variabel final:", paste(vars_final, collapse = ", "), "\n")
## Variabel final: RI, Na, Mg, Al, K, Ba, Fe
cat(" Dataset:", nrow(df_final), "baris x", ncol(df_final), "kolom\n")
## Dataset: 150 baris x 8 kolom
head(df_final)
## RI Na Mg Al K Ba Fe Type
## 1 1.52101 13.64 4.49 1.10 0.06 0 0.00 bldg_float
## 2 1.51761 13.89 3.60 1.36 0.48 0 0.00 bldg_float
## 3 1.51618 13.53 3.55 1.54 0.39 0 0.00 bldg_float
## 4 1.51766 13.21 3.69 1.29 0.57 0 0.00 bldg_float
## 5 1.51742 13.27 3.62 1.24 0.55 0 0.00 bldg_float
## 6 1.51596 12.79 3.61 1.62 0.64 0 0.26 bldg_float
sum(is.na(df_final))
## [1] 0
MODEL REGRESI LOGISTIK MULTINOMIAL
model_multinom <- multinom(
as.formula(paste("Type ~", paste(vars_final, collapse = " + "))),
data = df_final,
trace = FALSE
)
cat("ESTIMASI PARAMETER MODEL\n")
## ESTIMASI PARAMETER MODEL
print(summary(model_multinom))
## Call:
## multinom(formula = as.formula(paste("Type ~", paste(vars_final,
## collapse = " + "))), data = df_final, trace = FALSE)
##
## Coefficients:
## (Intercept) RI Na Mg Al K
## bldg_non_float -168.2918 97.05469 1.037690 -0.2245978 5.25030219 1.609452
## vehicle_float 625.8205 -422.13752 1.250729 -0.5492412 0.09324865 -2.336724
## Ba Fe
## bldg_non_float -1.898337 3.993628
## vehicle_float -1.643733 -2.508314
##
## Std. Errors:
## (Intercept) RI Na Mg Al K
## bldg_non_float 2.678513 4.068030 0.6266196 0.5858427 1.196128 1.803303
## vehicle_float 3.789424 5.775927 0.8832948 0.9019059 1.438085 2.141522
## Ba Fe
## bldg_non_float 3.589812 2.184576
## vehicle_float 4.283600 3.753342
##
## Residual Deviance: 231.1288
## AIC: 263.1288
UJI SERENTAK (Likelihood Ratio Test / G²)
cat("UJI SERENTAK\n")
## UJI SERENTAK
cat("H0 : β1 = β2 = ... = βk = 0 (variabel prediktor tidak signifikan terhadap model)\n")
## H0 : β1 = β2 = ... = βk = 0 (variabel prediktor tidak signifikan terhadap model)
cat("H1 : minimal ada satu βi ≠ 0 (i = 1,2,...,k)\n\n")
## H1 : minimal ada satu βi ≠ 0 (i = 1,2,...,k)
model_null <- multinom(Type ~ 1, data = df_final, trace = FALSE)
LL_null <- logLik(model_null)
LL_full <- logLik(model_multinom)
G2 <- as.numeric(-2 * (LL_null - LL_full))
df_G2 <- attr(LL_full, "df") - attr(LL_null, "df")
p_G2 <- pchisq(G2, df = df_G2, lower.tail = FALSE)
chi_tabel <- qchisq(0.95, df = df_G2)
uji_serentak <- data.frame(
Model = c("Intercept Only", "Final"),
`-2LogLikelihood` = c(round(-2 * as.numeric(LL_null), 3),
round(-2 * as.numeric(LL_full), 3)),
Chi_Square = c(NA, round(G2, 3)),
df = c(NA, df_G2),
p_value = c(NA, round(p_G2, 4))
)
print(uji_serentak)
## Model X.2LogLikelihood Chi_Square df p_value
## 1 Intercept Only 287.580 NA NA NA
## 2 Final 231.129 56.452 14 0
cat(sprintf("\nG² hitung = %.3f\n", G2))
##
## G² hitung = 56.452
cat(sprintf("χ² tabel (df=%d, α=0.05) = %.3f\n", df_G2, chi_tabel))
## χ² tabel (df=14, α=0.05) = 23.685
cat(sprintf("p-value = %.4f\n\n", p_G2))
## p-value = 0.0000
cat(ifelse(G2 > chi_tabel & p_G2 < 0.05,
"Keputusan : Tolak H0\nKesimpulan: Minimal ada satu variabel prediktor yang berpengaruh signifikan\n",
"Keputusan : Gagal Tolak H0\nKesimpulan: Tidak ada variabel prediktor yang signifikan\n"))
## Keputusan : Tolak H0
## Kesimpulan: Minimal ada satu variabel prediktor yang berpengaruh signifikan
UJI PARSIAL (Wald Test)
cat("UJI PARSIAL (Wald Test)\n")
## UJI PARSIAL (Wald Test)
cat("H0 : βj = 0\n")
## H0 : βj = 0
cat("H1 : βj ≠ 0 ; j = 1, 2, ..., p\n")
## H1 : βj ≠ 0 ; j = 1, 2, ..., p
cat("Kriteria: Tolak H0 jika |W| > 1.96 atau p-value < 0.05\n\n")
## Kriteria: Tolak H0 jika |W| > 1.96 atau p-value < 0.05
coef_mat <- summary(model_multinom)$coefficients
se_mat <- summary(model_multinom)$standard.errors
W_mat <- coef_mat / se_mat
p_mat <- 2 * (1 - pnorm(abs(W_mat)))
kategori <- rownames(coef_mat)
for (kat in kategori) {
cat(sprintf("══ Model Logit: %s vs bldg_float (referensi) ══\n", kat))
parsial_tbl <- data.frame(
Variabel = colnames(coef_mat),
B = round(coef_mat[kat, ], 4),
SE = round(se_mat[kat, ], 4),
Wald_W = round(W_mat[kat, ], 4),
df = 1,
p_value = round(p_mat[kat, ], 4),
Keputusan = ifelse(p_mat[kat, ] < 0.05, "Tolak H0", "Gagal Tolak H0"),
row.names = NULL
)
print(parsial_tbl)
cat("\n")
}
## ══ Model Logit: bldg_non_float vs bldg_float (referensi) ══
## Variabel B SE Wald_W df p_value Keputusan
## 1 (Intercept) -168.2918 2.6785 -62.8303 1 0.0000 Tolak H0
## 2 RI 97.0547 4.0680 23.8579 1 0.0000 Tolak H0
## 3 Na 1.0377 0.6266 1.6560 1 0.0977 Gagal Tolak H0
## 4 Mg -0.2246 0.5858 -0.3834 1 0.7014 Gagal Tolak H0
## 5 Al 5.2503 1.1961 4.3894 1 0.0000 Tolak H0
## 6 K 1.6095 1.8033 0.8925 1 0.3721 Gagal Tolak H0
## 7 Ba -1.8983 3.5898 -0.5288 1 0.5969 Gagal Tolak H0
## 8 Fe 3.9936 2.1846 1.8281 1 0.0675 Gagal Tolak H0
##
## ══ Model Logit: vehicle_float vs bldg_float (referensi) ══
## Variabel B SE Wald_W df p_value Keputusan
## 1 (Intercept) 625.8205 3.7894 165.1492 1 0.0000 Tolak H0
## 2 RI -422.1375 5.7759 -73.0857 1 0.0000 Tolak H0
## 3 Na 1.2507 0.8833 1.4160 1 0.1568 Gagal Tolak H0
## 4 Mg -0.5492 0.9019 -0.6090 1 0.5425 Gagal Tolak H0
## 5 Al 0.0932 1.4381 0.0648 1 0.9483 Gagal Tolak H0
## 6 K -2.3367 2.1415 -1.0912 1 0.2752 Gagal Tolak H0
## 7 Ba -1.6437 4.2836 -0.3837 1 0.7012 Gagal Tolak H0
## 8 Fe -2.5083 3.7533 -0.6683 1 0.5039 Gagal Tolak H0
ODDS RATIO
cat("─── ODDS RATIO ───\n")
## ─── ODDS RATIO ───
odds_ratio <- exp(coef(model_multinom))
for(kat in rownames(odds_ratio)) {
cat(sprintf("\n─── Odds Ratio: %s vs bldg_float ───\n", kat))
or_tbl <- data.frame(
variabel = colnames(odds_ratio),
Odds_Ratio = round(odds_ratio[kat, ], 4),
row.names = NULL
)
print(or_tbl)
}
##
## ─── Odds Ratio: bldg_non_float vs bldg_float ───
## variabel Odds_Ratio
## 1 (Intercept) 0.000000e+00
## 2 RI 1.413571e+42
## 3 Na 2.822700e+00
## 4 Mg 7.988000e-01
## 5 Al 1.906239e+02
## 6 K 5.000100e+00
## 7 Ba 1.498000e-01
## 8 Fe 5.425130e+01
##
## ─── Odds Ratio: vehicle_float vs bldg_float ───
## variabel Odds_Ratio
## 1 (Intercept) 6.171331e+271
## 2 RI 0.000000e+00
## 3 Na 3.492900e+00
## 4 Mg 5.774000e-01
## 5 Al 1.097700e+00
## 6 K 9.660000e-02
## 7 Ba 1.933000e-01
## 8 Fe 8.140000e-02
cat("\nInterpretasi:\n")
##
## Interpretasi:
cat("Odds Ratio > 1 : meningkatkan peluang kategori dibanding referensi\n")
## Odds Ratio > 1 : meningkatkan peluang kategori dibanding referensi
cat("Odds Ratio < 1 : menurunkan peluang kategori dibanding referensi\n")
## Odds Ratio < 1 : menurunkan peluang kategori dibanding referensi
cat("Odds Ratio = 1 : tidak berpengaruh\n")
## Odds Ratio = 1 : tidak berpengaruh
Prediksi Model
pred <- predict(model_multinom, df_final)
cat("─── CONFUSION MATRIX ───\n")
## ─── CONFUSION MATRIX ───
conf_matrix <- table(Prediksi = pred, Aktual = df_final$Type)
print(conf_matrix)
## Aktual
## Prediksi bldg_float bldg_non_float vehicle_float
## bldg_float 54 16 9
## bldg_non_float 14 50 5
## vehicle_float 0 0 2
Akuarasi Model
accuracy <- mean(pred == df_final$Type)
cat("─── Akurasi Model ───\n")
## ─── Akurasi Model ───
cat(sprintf("Akurasi: %.4f (%.2f%%)\n", accuracy, accuracy * 100))
## Akurasi: 0.7067 (70.67%)
prob <- predict(model_multinom, df_final, type = "probs")
cat("\n─── Contoh Probabilitas (5 observasi pertama) ───\n")
##
## ─── Contoh Probabilitas (5 observasi pertama) ───
head(prob)
## bldg_float bldg_non_float vehicle_float
## 1 0.7276010 0.1393034 0.13309559
## 2 0.2998410 0.5029701 0.19718893
## 3 0.2352301 0.5321467 0.23262322
## 4 0.5365835 0.3503174 0.11309918
## 5 0.5598899 0.2875347 0.15257539
## 6 0.1309165 0.8531423 0.01594123