Library & Load Dataset
Load Library
# Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(MVN)
library(biotools)
## ---
## biotools version 4.3
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(nnet)
library(corrplot)
## corrplot 0.95 loaded
Load Dataset
# Load data
df <- read.csv("zoo.csv")
# Target jadi factor
df$class_type <- as.factor(df$class_type)
# Drop kolom nama (hindari konflik MASS vs dplyr)
df <- df %>% dplyr::select(-animal_name)
head(df)
## hair feathers eggs milk airborne aquatic predator toothed backbone breathes
## 1 1 0 0 1 0 0 1 1 1 1
## 2 1 0 0 1 0 0 0 1 1 1
## 3 0 0 1 0 0 1 1 1 1 0
## 4 1 0 0 1 0 0 1 1 1 1
## 5 1 0 0 1 0 0 1 1 1 1
## 6 1 0 0 1 0 0 0 1 1 1
## venomous fins legs tail domestic catsize class_type
## 1 0 0 4 0 0 1 1
## 2 0 0 4 1 0 1 1
## 3 0 1 0 1 0 0 4
## 4 0 0 4 0 0 1 1
## 5 0 0 4 1 0 1 1
## 6 0 0 4 1 0 1 1
Data Understanding
Dimensi Data
dim(df)
## [1] 101 17
Struktur Data
str(df)
## 'data.frame': 101 obs. of 17 variables:
## $ hair : int 1 1 0 1 1 1 1 0 0 1 ...
## $ feathers : int 0 0 0 0 0 0 0 0 0 0 ...
## $ eggs : int 0 0 1 0 0 0 0 1 1 0 ...
## $ milk : int 1 1 0 1 1 1 1 0 0 1 ...
## $ airborne : int 0 0 0 0 0 0 0 0 0 0 ...
## $ aquatic : int 0 0 1 0 0 0 0 1 1 0 ...
## $ predator : int 1 0 1 1 1 0 0 0 1 0 ...
## $ toothed : int 1 1 1 1 1 1 1 1 1 1 ...
## $ backbone : int 1 1 1 1 1 1 1 1 1 1 ...
## $ breathes : int 1 1 0 1 1 1 1 0 0 1 ...
## $ venomous : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fins : int 0 0 1 0 0 0 0 1 1 0 ...
## $ legs : int 4 4 0 4 4 4 4 0 0 4 ...
## $ tail : int 0 1 1 0 1 1 1 1 1 0 ...
## $ domestic : int 0 0 0 0 0 0 1 1 0 1 ...
## $ catsize : int 1 1 0 1 1 1 1 0 0 0 ...
## $ class_type: Factor w/ 7 levels "1","2","3","4",..: 1 1 4 1 1 1 1 4 4 1 ...
Ringkasan data
summary(df)
## hair feathers eggs milk
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :1.0000 Median :0.0000
## Mean :0.4257 Mean :0.198 Mean :0.5842 Mean :0.4059
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
##
## airborne aquatic predator toothed
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :1.000
## Mean :0.2376 Mean :0.3564 Mean :0.5545 Mean :0.604
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
##
## backbone breathes venomous fins
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.00000 Median :0.0000
## Mean :0.8218 Mean :0.7921 Mean :0.07921 Mean :0.1683
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
##
## legs tail domestic catsize class_type
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000 1:41
## 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 2:20
## Median :4.000 Median :1.0000 Median :0.0000 Median :0.0000 3: 5
## Mean :2.842 Mean :0.7426 Mean :0.1287 Mean :0.4356 4:13
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 5: 4
## Max. :8.000 Max. :1.0000 Max. :1.0000 Max. :1.0000 6: 8
## 7:10
Distribusi Targer
table(df$class_type)
##
## 1 2 3 4 5 6 7
## 41 20 5 13 4 8 10
Data Preprocessing
Ambil Variabel Numerik
# Drop kolom tidak perlu
df_num <- df %>% dplyr::select(where(is.numeric))
Cek Missing Value
# Cek missing
colSums(is.na(df))
## hair feathers eggs milk airborne aquatic predator
## 0 0 0 0 0 0 0
## toothed backbone breathes venomous fins legs tail
## 0 0 0 0 0 0 0
## domestic catsize class_type
## 0 0 0
Cek Duplikasi
# Cek duplikat
sum(duplicated(df))
## [1] 42
Exploratory Data Analysis (EDA)
Distribusi Class & Korelasi Variabel
# Distribusi kelas
ggplot(df, aes(x = class_type)) +
geom_bar() +
labs(title = "Distribusi Class Type")

# Korelasi (numeric only)
df_num <- df %>% dplyr::select(where(is.numeric))
library(corrplot)
corrplot(cor(df_num), method = "color")

Boxplot Per Variabel
df %>%
pivot_longer(-class_type) %>%
ggplot(aes(x = class_type, y = value)) +
geom_boxplot() +
facet_wrap(~name, scales = "free")

Statistika Deskriptif
Summart Statistik
summary(df)
## hair feathers eggs milk
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :1.0000 Median :0.0000
## Mean :0.4257 Mean :0.198 Mean :0.5842 Mean :0.4059
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
##
## airborne aquatic predator toothed
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :1.000
## Mean :0.2376 Mean :0.3564 Mean :0.5545 Mean :0.604
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
##
## backbone breathes venomous fins
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.00000 Median :0.0000
## Mean :0.8218 Mean :0.7921 Mean :0.07921 Mean :0.1683
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
##
## legs tail domestic catsize class_type
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000 1:41
## 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 2:20
## Median :4.000 Median :1.0000 Median :0.0000 Median :0.0000 3: 5
## Mean :2.842 Mean :0.7426 Mean :0.1287 Mean :0.4356 4:13
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 5: 4
## Max. :8.000 Max. :1.0000 Max. :1.0000 Max. :1.0000 6: 8
## 7:10
Uji Asumsi LDA
Normalisasi Multivariat
args(MVN::mvn)
## function (data, subset = NULL, mvn_test = "hz", use_population = TRUE,
## tol = 1e-25, alpha = 0.05, outlier_seed = NULL, scale = FALSE,
## descriptives = TRUE, transform = "none", impute = "none",
## bootstrap = FALSE, B = 1000, cores = 1, univariate_test = "AD",
## multivariate_outlier_method = "none", power_family = "none",
## power_transform_type = "optimal", show_new_data = FALSE,
## tidy = TRUE)
## NULL
mvn_result <- MVN::mvn(data = df_num)
mvn_result$multivariateNormality
## NULL
Homogenitas Kovarians
boxM(df_num, df$class_type)
## Warning in boxM(df_num, df$class_type): there are one or more levels with less
## observations than variables!
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df_num
## Chi-Sq (approx.) = NaN, df = 816, p-value = NA
table(df$class_type)
##
## 1 2 3 4 5 6 7
## 41 20 5 13 4 8 10
Split Data
Train-Test Split
set.seed(123)
trainIndex <- createDataPartition(df$class_type, p = 0.8, list = FALSE)
train <- df[trainIndex, ]
test <- df[-trainIndex, ]
Linear Disriminant Analysis (LDA)
Pembuatan Model
remove_constant <- function(df, target){
df %>%
group_by(.data[[target]]) %>%
summarise(across(everything(), ~ n_distinct(.))) %>%
summarise(across(-1, ~ all(. > 1))) %>%
pivot_longer(everything()) %>%
filter(value == TRUE) %>%
pull(name)
}
cols_keep <- remove_constant(train, "class_type")
train_clean <- train %>% dplyr::select(all_of(cols_keep), class_type)
test_clean <- test %>% dplyr::select(all_of(cols_keep), class_type)
Evaluasi Model LDA
lda_model <- lda(class_type ~ ., data = train_clean)
lda_pred <- predict(lda_model, test_clean)
confusionMatrix(lda_pred$class, test_clean$class_type)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7
## 1 8 4 1 2 0 1 2
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## 7 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.4444
## 95% CI : (0.2153, 0.6924)
## No Information Rate : 0.4444
## P-Value [Acc > NIR] : 0.59
##
## Kappa : 0
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 1.0000 0.0000 0.00000 0.0000 NA 0.00000
## Specificity 0.0000 1.0000 1.00000 1.0000 1 1.00000
## Pos Pred Value 0.4444 NaN NaN NaN NA NaN
## Neg Pred Value NaN 0.7778 0.94444 0.8889 NA 0.94444
## Prevalence 0.4444 0.2222 0.05556 0.1111 0 0.05556
## Detection Rate 0.4444 0.0000 0.00000 0.0000 0 0.00000
## Detection Prevalence 1.0000 0.0000 0.00000 0.0000 0 0.00000
## Balanced Accuracy 0.5000 0.5000 0.50000 0.5000 NA 0.50000
## Class: 7
## Sensitivity 0.0000
## Specificity 1.0000
## Pos Pred Value NaN
## Neg Pred Value 0.8889
## Prevalence 0.1111
## Detection Rate 0.0000
## Detection Prevalence 0.0000
## Balanced Accuracy 0.5000
Koefisien Diskriminan
lda_model$scaling
## LD1
## predator 2.067728
Proporsi Varians
lda_model$svd^2 / sum(lda_model$svd^2)
## [1] 1
Visualisasi LDA
lda_values <- predict(lda_model)$x
plot(lda_values, col = df$class_type, pch = 19)

lda_df <- data.frame(lda_values, class = train_clean$class_type)
ggplot(lda_df, aes(LD1, fill = class)) +
geom_density(alpha = 0.5)

Evaluasi Model LDA
lda_pred <- predict(lda_model, test)
confusionMatrix(lda_pred$class, test$class_type)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7
## 1 8 4 1 2 0 1 2
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## 7 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.4444
## 95% CI : (0.2153, 0.6924)
## No Information Rate : 0.4444
## P-Value [Acc > NIR] : 0.59
##
## Kappa : 0
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 1.0000 0.0000 0.00000 0.0000 NA 0.00000
## Specificity 0.0000 1.0000 1.00000 1.0000 1 1.00000
## Pos Pred Value 0.4444 NaN NaN NaN NA NaN
## Neg Pred Value NaN 0.7778 0.94444 0.8889 NA 0.94444
## Prevalence 0.4444 0.2222 0.05556 0.1111 0 0.05556
## Detection Rate 0.4444 0.0000 0.00000 0.0000 0 0.00000
## Detection Prevalence 1.0000 0.0000 0.00000 0.0000 0 0.00000
## Balanced Accuracy 0.5000 0.5000 0.50000 0.5000 NA 0.50000
## Class: 7
## Sensitivity 0.0000
## Specificity 1.0000
## Pos Pred Value NaN
## Neg Pred Value 0.8889
## Prevalence 0.1111
## Detection Rate 0.0000
## Detection Prevalence 0.0000
## Balanced Accuracy 0.5000
Regresi Logistik Multinomal
Pembuatan Model
log_model <- multinom(class_type ~ ., data = train)
## # weights: 126 (102 variable)
## initial value 161.510542
## iter 10 value 10.510024
## iter 20 value 0.049522
## iter 30 value 0.002035
## iter 40 value 0.000402
## final value 0.000063
## converged
summary(log_model)
## Call:
## multinom(formula = class_type ~ ., data = train)
##
## Coefficients:
## (Intercept) hair feathers eggs milk airborne aquatic
## 2 0.677605 -43.482378 128.491918 2.906694 -62.48411 -1.239885 -12.665958
## 3 10.214371 -96.134709 -100.783271 3.331750 -101.07737 -20.172078 -81.586405
## 4 -16.216777 -5.220667 9.809099 73.078459 -66.73755 17.486240 -7.764446
## 5 -67.353467 -62.517385 1.016034 24.874097 -69.51600 -2.172317 69.843020
## 6 -22.409084 -23.595257 9.161048 57.965589 -31.16388 59.273436 -41.262258
## 7 133.965267 -42.326759 5.872441 13.203903 -38.54315 -32.918423 7.550430
## predator toothed backbone breathes venomous fins legs
## 2 2.7789335 -1.248341 -4.308894 -24.23579 4.67938 16.02902 12.410356
## 3 0.1164044 17.062279 75.226423 -22.01818 57.70502 -79.89750 -8.541119
## 4 -10.1371057 13.133842 -1.359614 -86.74845 12.13821 52.78517 14.227331
## 5 -17.7242307 38.250581 21.592603 43.12856 -2.76416 -33.35685 11.270228
## 6 -24.6732173 -8.481207 -39.541544 30.30207 -31.20064 23.00812 8.101123
## 7 58.6232337 -13.733957 -106.435291 -24.98812 39.64470 2.55780 -11.358743
## tail domestic catsize
## 2 -24.800010 10.0481458 -7.9766998
## 3 75.996203 -11.6613150 -5.3049992
## 4 6.967211 2.9462901 2.4593420
## 5 -16.938656 -21.9696663 -37.8683501
## 6 -39.150588 -26.2358062 0.8227688
## 7 1.875908 -0.4791144 -47.0141065
##
## Std. Errors:
## (Intercept) hair feathers eggs milk airborne
## 2 1.921302e+03 1.037767e-07 1.921302e+03 1.921302e+03 4.222239e-11 1.921302e+03
## 3 6.609523e-03 5.623511e-18 6.699444e-20 3.772944e-12 5.623511e-18 1.982667e-26
## 4 7.200298e+02 1.151855e+03 4.530748e-11 7.200298e+02 5.505152e-11 1.151855e+03
## 5 3.833468e+03 4.696876e-10 3.833468e+03 3.833468e+03 4.696876e-10 3.833468e+03
## 6 3.658402e+03 1.151855e+03 4.793197e+03 3.658402e+03 3.627701e-12 3.658402e+03
## 7 4.860296e+02 5.156506e-19 3.196546e-07 4.761206e+02 1.497674e+01 8.893449e-12
## aquatic predator toothed backbone breathes venomous
## 2 3.833468e+03 2.384399e+04 6.567164e-21 1.921302e+03 1.921302e+03 1.132519e-12
## 3 6.609523e-03 6.609523e-03 6.609523e-03 6.609523e-03 9.264122e-11 6.609523e-03
## 4 4.761206e+02 4.761206e+02 2.316607e+02 2.316607e+02 1.151855e+03 1.151857e+03
## 5 3.833468e+03 2.384399e+04 5.622981e-10 3.833468e+03 3.833468e+03 1.902560e-25
## 6 1.227705e-09 1.745647e-11 3.627701e-12 4.793197e+03 3.658402e+03 1.151855e+03
## 7 4.860296e+02 4.860296e+02 2.249861e+02 2.249861e+02 1.497674e+01 4.998901e-03
## fins legs tail domestic catsize
## 2 1.009923e-38 3.842603e+03 1.921302e+03 1.301098e-15 2.415971e+04
## 3 9.639395e-11 3.705453e-10 6.609523e-03 1.870558e-30 9.639403e-11
## 4 2.316607e+02 3.692633e+03 2.316607e+02 1.151855e+03 1.610653e-03
## 5 4.906032e-17 7.666936e+03 3.833468e+03 1.560435e-18 2.736848e+04
## 6 9.914184e-47 2.941336e+03 4.793197e+03 1.151855e+03 4.781450e+03
## 7 2.249889e+02 4.162207e+03 2.249861e+02 4.837968e-20 1.497513e+01
##
## Residual Deviance: 0.0001250045
## AIC: 204.0001
Estimasi Parameter
coef(log_model)
## (Intercept) hair feathers eggs milk airborne aquatic
## 2 0.677605 -43.482378 128.491918 2.906694 -62.48411 -1.239885 -12.665958
## 3 10.214371 -96.134709 -100.783271 3.331750 -101.07737 -20.172078 -81.586405
## 4 -16.216777 -5.220667 9.809099 73.078459 -66.73755 17.486240 -7.764446
## 5 -67.353467 -62.517385 1.016034 24.874097 -69.51600 -2.172317 69.843020
## 6 -22.409084 -23.595257 9.161048 57.965589 -31.16388 59.273436 -41.262258
## 7 133.965267 -42.326759 5.872441 13.203903 -38.54315 -32.918423 7.550430
## predator toothed backbone breathes venomous fins legs
## 2 2.7789335 -1.248341 -4.308894 -24.23579 4.67938 16.02902 12.410356
## 3 0.1164044 17.062279 75.226423 -22.01818 57.70502 -79.89750 -8.541119
## 4 -10.1371057 13.133842 -1.359614 -86.74845 12.13821 52.78517 14.227331
## 5 -17.7242307 38.250581 21.592603 43.12856 -2.76416 -33.35685 11.270228
## 6 -24.6732173 -8.481207 -39.541544 30.30207 -31.20064 23.00812 8.101123
## 7 58.6232337 -13.733957 -106.435291 -24.98812 39.64470 2.55780 -11.358743
## tail domestic catsize
## 2 -24.800010 10.0481458 -7.9766998
## 3 75.996203 -11.6613150 -5.3049992
## 4 6.967211 2.9462901 2.4593420
## 5 -16.938656 -21.9696663 -37.8683501
## 6 -39.150588 -26.2358062 0.8227688
## 7 1.875908 -0.4791144 -47.0141065
Uji Signifikansi
z <- summary(log_model)$coefficients / summary(log_model)$standard.errors
p_value <- (1 - pnorm(abs(z))) * 2
p_value
## (Intercept) hair feathers eggs milk airborne aquatic
## 2 0.9997186 0.0000000 0.9466792 0.9987929 0.00000000 0.9994851 0.9973638
## 3 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000
## 4 0.9820313 0.9963837 0.0000000 0.9191586 0.00000000 0.9878878 0.9869889
## 5 0.9859820 0.0000000 0.9997885 0.9948228 0.00000000 0.9995479 0.9854639
## 6 0.9951127 0.9836568 0.9984750 0.9873584 0.00000000 0.9870732 0.0000000
## 7 0.7828308 0.0000000 0.0000000 0.9778757 0.01006658 0.0000000 0.9876054
## predator toothed backbone breathes venomous fins legs
## 2 0.9999070 0.0000000 0.9982106 0.98993555 0.0000000 0.0000000 0.9974231
## 3 0.0000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## 4 0.9830135 0.9547887 0.9953173 0.93996653 0.9915921 0.8197586 0.9969258
## 5 0.9994069 0.0000000 0.9955058 0.99102356 0.0000000 0.0000000 0.9988271
## 6 0.0000000 0.0000000 0.9934179 0.99339130 0.9783901 0.0000000 0.9978024
## 7 0.9039947 0.9513245 0.6361597 0.09522409 0.0000000 0.9909294 0.9978226
## tail domestic catsize
## 2 0.9897013 0.0000000 0.999736566
## 3 0.0000000 0.0000000 0.000000000
## 4 0.9760072 0.9979591 0.000000000
## 5 0.9964745 0.0000000 0.998896009
## 6 0.9934830 0.9818282 0.999862704
## 7 0.9933474 0.0000000 0.001692483
Odds Ratio
exp(coef(log_model))
## (Intercept) hair feathers eggs milk airborne
## 2 1.969156e+00 1.305699e-19 6.358154e+55 1.829621e+01 7.302887e-28 2.894174e-01
## 3 2.729259e+04 1.775114e-42 1.699736e-44 2.798727e+01 1.266645e-44 1.735313e-09
## 4 9.060313e-08 5.403726e-03 1.819858e+04 5.464767e+31 1.038124e-29 3.928054e+07
## 5 5.607391e-30 7.063903e-28 2.762219e+00 6.348675e+10 6.450373e-31 1.139134e-01
## 6 1.852928e-10 5.658613e-11 9.519032e+03 1.493260e+25 2.922122e-14 5.522381e+25
## 7 1.514873e+58 4.146888e-19 3.551149e+02 5.424780e+05 1.823580e-17 5.054875e-15
## aquatic predator toothed backbone breathes venomous
## 2 3.156781e-06 1.610184e+01 2.869804e-01 1.344841e-02 2.982158e-11 1.077032e+02
## 3 3.693810e-36 1.123450e+00 2.570713e+07 4.681881e+32 2.739204e-10 1.150723e+25
## 4 4.245648e-04 3.958320e-05 5.057722e+05 2.567598e-01 2.116544e-38 1.868769e+05
## 5 2.149998e+30 2.006617e-08 4.092759e+16 2.385330e+09 5.376435e+18 6.302901e-02
## 6 1.202345e-18 1.925564e-11 2.073282e-04 6.719328e-18 1.445510e+13 2.816654e-14
## 7 1.901561e+03 2.882351e+25 1.084971e-06 5.966786e-47 1.405396e-11 1.649960e+17
## fins legs tail domestic catsize
## 2 9.147747e+06 2.453292e+05 1.696261e-11 2.311289e+04 3.433707e-04
## 3 1.999664e-35 1.952717e-04 1.010954e+33 8.620952e-06 4.966702e-03
## 4 8.400595e+22 1.509564e+06 1.061258e+03 1.903520e+01 1.169711e+01
## 5 3.260657e-15 7.845091e+04 4.401851e-08 2.875380e-10 3.580837e-17
## 6 9.824226e+09 3.298169e+03 9.933804e-18 4.035842e-12 2.276795e+00
## 7 1.290739e+01 1.166704e-05 6.526745e+00 6.193316e-01 3.819733e-21
Evaluasi Model
log_pred <- predict(log_model, test)
confusionMatrix(log_pred, test$class_type)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4 5 6 7
## 1 8 0 0 0 0 0 0
## 2 0 3 0 0 0 0 0
## 3 0 0 1 0 0 0 0
## 4 0 0 0 2 0 0 1
## 5 0 1 0 0 0 0 0
## 6 0 0 0 0 0 1 0
## 7 0 0 0 0 0 0 1
##
## Overall Statistics
##
## Accuracy : 0.8889
## 95% CI : (0.6529, 0.9862)
## No Information Rate : 0.4444
## P-Value [Acc > NIR] : 0.0001202
##
## Kappa : 0.8487
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity 1.0000 0.7500 1.00000 1.0000 NA 1.00000
## Specificity 1.0000 1.0000 1.00000 0.9375 0.94444 1.00000
## Pos Pred Value 1.0000 1.0000 1.00000 0.6667 NA 1.00000
## Neg Pred Value 1.0000 0.9333 1.00000 1.0000 NA 1.00000
## Prevalence 0.4444 0.2222 0.05556 0.1111 0.00000 0.05556
## Detection Rate 0.4444 0.1667 0.05556 0.1111 0.00000 0.05556
## Detection Prevalence 0.4444 0.1667 0.05556 0.1667 0.05556 0.05556
## Balanced Accuracy 1.0000 0.8750 1.00000 0.9688 NA 1.00000
## Class: 7
## Sensitivity 0.50000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 0.94118
## Prevalence 0.11111
## Detection Rate 0.05556
## Detection Prevalence 0.05556
## Balanced Accuracy 0.75000
Perbandingan Model
Confusion Matrix Heatmap
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
cm <- confusionMatrix(log_pred, test$class_type)
cm_table <- as.data.frame(cm$table)
ggplot(cm_table, aes(Prediction, Reference, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq)) +
scale_fill_gradient(low = "white", high = "blue")

Interpretasi & Insight
Feature Importance
importance <- abs(coef(log_model))
importance_df <- as.data.frame(importance)
importance_df %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(x = name, y = value)) +
geom_bar(stat = "identity") +
coord_flip()
