Library & Load Dataset

Load Library

# Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(MVN)
library(biotools)
## ---
## biotools version 4.3
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(nnet)
library(corrplot)
## corrplot 0.95 loaded

Load Dataset

# Load data
df <- read.csv("zoo.csv")

# Target jadi factor
df$class_type <- as.factor(df$class_type)

# Drop kolom nama (hindari konflik MASS vs dplyr)
df <- df %>% dplyr::select(-animal_name)

head(df)
##   hair feathers eggs milk airborne aquatic predator toothed backbone breathes
## 1    1        0    0    1        0       0        1       1        1        1
## 2    1        0    0    1        0       0        0       1        1        1
## 3    0        0    1    0        0       1        1       1        1        0
## 4    1        0    0    1        0       0        1       1        1        1
## 5    1        0    0    1        0       0        1       1        1        1
## 6    1        0    0    1        0       0        0       1        1        1
##   venomous fins legs tail domestic catsize class_type
## 1        0    0    4    0        0       1          1
## 2        0    0    4    1        0       1          1
## 3        0    1    0    1        0       0          4
## 4        0    0    4    0        0       1          1
## 5        0    0    4    1        0       1          1
## 6        0    0    4    1        0       1          1

Data Understanding

Dimensi Data

dim(df)
## [1] 101  17

Struktur Data

str(df)
## 'data.frame':    101 obs. of  17 variables:
##  $ hair      : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ feathers  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ eggs      : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ milk      : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ airborne  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ aquatic   : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ predator  : int  1 0 1 1 1 0 0 0 1 0 ...
##  $ toothed   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ backbone  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ breathes  : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ venomous  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fins      : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ legs      : int  4 4 0 4 4 4 4 0 0 4 ...
##  $ tail      : int  0 1 1 0 1 1 1 1 1 0 ...
##  $ domestic  : int  0 0 0 0 0 0 1 1 0 1 ...
##  $ catsize   : int  1 1 0 1 1 1 1 0 0 0 ...
##  $ class_type: Factor w/ 7 levels "1","2","3","4",..: 1 1 4 1 1 1 1 4 4 1 ...

Ringkasan data

summary(df)
##       hair           feathers          eggs             milk       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :1.0000   Median :0.0000  
##  Mean   :0.4257   Mean   :0.198   Mean   :0.5842   Mean   :0.4059  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##     airborne         aquatic          predator         toothed     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :1.000  
##  Mean   :0.2376   Mean   :0.3564   Mean   :0.5545   Mean   :0.604  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##                                                                    
##     backbone         breathes         venomous            fins       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.8218   Mean   :0.7921   Mean   :0.07921   Mean   :0.1683  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##                                                                      
##       legs            tail           domestic         catsize       class_type
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   1:41      
##  1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   2:20      
##  Median :4.000   Median :1.0000   Median :0.0000   Median :0.0000   3: 5      
##  Mean   :2.842   Mean   :0.7426   Mean   :0.1287   Mean   :0.4356   4:13      
##  3rd Qu.:4.000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   5: 4      
##  Max.   :8.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   6: 8      
##                                                                     7:10

Distribusi Targer

table(df$class_type)
## 
##  1  2  3  4  5  6  7 
## 41 20  5 13  4  8 10

Data Preprocessing

Ambil Variabel Numerik

# Drop kolom tidak perlu
df_num <- df %>% dplyr::select(where(is.numeric))

Cek Missing Value

# Cek missing
colSums(is.na(df))
##       hair   feathers       eggs       milk   airborne    aquatic   predator 
##          0          0          0          0          0          0          0 
##    toothed   backbone   breathes   venomous       fins       legs       tail 
##          0          0          0          0          0          0          0 
##   domestic    catsize class_type 
##          0          0          0

Cek Duplikasi

# Cek duplikat
sum(duplicated(df))
## [1] 42

Exploratory Data Analysis (EDA)

Distribusi Class & Korelasi Variabel

# Distribusi kelas
ggplot(df, aes(x = class_type)) +
  geom_bar() +
  labs(title = "Distribusi Class Type")

# Korelasi (numeric only)
df_num <- df %>% dplyr::select(where(is.numeric))

library(corrplot)
corrplot(cor(df_num), method = "color")

Boxplot Per Variabel

df %>%
  pivot_longer(-class_type) %>%
  ggplot(aes(x = class_type, y = value)) +
  geom_boxplot() +
  facet_wrap(~name, scales = "free")

Statistika Deskriptif

Summart Statistik

summary(df)
##       hair           feathers          eggs             milk       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :1.0000   Median :0.0000  
##  Mean   :0.4257   Mean   :0.198   Mean   :0.5842   Mean   :0.4059  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##     airborne         aquatic          predator         toothed     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :1.000  
##  Mean   :0.2376   Mean   :0.3564   Mean   :0.5545   Mean   :0.604  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##                                                                    
##     backbone         breathes         venomous            fins       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.8218   Mean   :0.7921   Mean   :0.07921   Mean   :0.1683  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##                                                                      
##       legs            tail           domestic         catsize       class_type
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   1:41      
##  1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   2:20      
##  Median :4.000   Median :1.0000   Median :0.0000   Median :0.0000   3: 5      
##  Mean   :2.842   Mean   :0.7426   Mean   :0.1287   Mean   :0.4356   4:13      
##  3rd Qu.:4.000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   5: 4      
##  Max.   :8.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   6: 8      
##                                                                     7:10

Uji Asumsi LDA

Normalisasi Multivariat

args(MVN::mvn)
## function (data, subset = NULL, mvn_test = "hz", use_population = TRUE, 
##     tol = 1e-25, alpha = 0.05, outlier_seed = NULL, scale = FALSE, 
##     descriptives = TRUE, transform = "none", impute = "none", 
##     bootstrap = FALSE, B = 1000, cores = 1, univariate_test = "AD", 
##     multivariate_outlier_method = "none", power_family = "none", 
##     power_transform_type = "optimal", show_new_data = FALSE, 
##     tidy = TRUE) 
## NULL
mvn_result <- MVN::mvn(data = df_num)

mvn_result$multivariateNormality
## NULL

Homogenitas Kovarians

boxM(df_num, df$class_type)
## Warning in boxM(df_num, df$class_type): there are one or more levels with less
## observations than variables!
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  df_num
## Chi-Sq (approx.) = NaN, df = 816, p-value = NA
table(df$class_type)
## 
##  1  2  3  4  5  6  7 
## 41 20  5 13  4  8 10

Split Data

Train-Test Split

set.seed(123)

trainIndex <- createDataPartition(df$class_type, p = 0.8, list = FALSE)

train <- df[trainIndex, ]
test <- df[-trainIndex, ]

Linear Disriminant Analysis (LDA)

Pembuatan Model

remove_constant <- function(df, target){
  df %>%
    group_by(.data[[target]]) %>%
    summarise(across(everything(), ~ n_distinct(.))) %>%
    summarise(across(-1, ~ all(. > 1))) %>%
    pivot_longer(everything()) %>%
    filter(value == TRUE) %>%
    pull(name)
}

cols_keep <- remove_constant(train, "class_type")

train_clean <- train %>% dplyr::select(all_of(cols_keep), class_type)
test_clean  <- test  %>% dplyr::select(all_of(cols_keep), class_type)

Evaluasi Model LDA

lda_model <- lda(class_type ~ ., data = train_clean)

lda_pred <- predict(lda_model, test_clean)

confusionMatrix(lda_pred$class, test_clean$class_type)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 1 2 3 4 5 6 7
##          1 8 4 1 2 0 1 2
##          2 0 0 0 0 0 0 0
##          3 0 0 0 0 0 0 0
##          4 0 0 0 0 0 0 0
##          5 0 0 0 0 0 0 0
##          6 0 0 0 0 0 0 0
##          7 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.2153, 0.6924)
##     No Information Rate : 0.4444          
##     P-Value [Acc > NIR] : 0.59            
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity            1.0000   0.0000  0.00000   0.0000       NA  0.00000
## Specificity            0.0000   1.0000  1.00000   1.0000        1  1.00000
## Pos Pred Value         0.4444      NaN      NaN      NaN       NA      NaN
## Neg Pred Value            NaN   0.7778  0.94444   0.8889       NA  0.94444
## Prevalence             0.4444   0.2222  0.05556   0.1111        0  0.05556
## Detection Rate         0.4444   0.0000  0.00000   0.0000        0  0.00000
## Detection Prevalence   1.0000   0.0000  0.00000   0.0000        0  0.00000
## Balanced Accuracy      0.5000   0.5000  0.50000   0.5000       NA  0.50000
##                      Class: 7
## Sensitivity            0.0000
## Specificity            1.0000
## Pos Pred Value            NaN
## Neg Pred Value         0.8889
## Prevalence             0.1111
## Detection Rate         0.0000
## Detection Prevalence   0.0000
## Balanced Accuracy      0.5000

Koefisien Diskriminan

lda_model$scaling
##               LD1
## predator 2.067728

Proporsi Varians

lda_model$svd^2 / sum(lda_model$svd^2)
## [1] 1

Visualisasi LDA

lda_values <- predict(lda_model)$x

plot(lda_values, col = df$class_type, pch = 19)

lda_df <- data.frame(lda_values, class = train_clean$class_type)

ggplot(lda_df, aes(LD1, fill = class)) +
  geom_density(alpha = 0.5)

Evaluasi Model LDA

lda_pred <- predict(lda_model, test)

confusionMatrix(lda_pred$class, test$class_type)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 1 2 3 4 5 6 7
##          1 8 4 1 2 0 1 2
##          2 0 0 0 0 0 0 0
##          3 0 0 0 0 0 0 0
##          4 0 0 0 0 0 0 0
##          5 0 0 0 0 0 0 0
##          6 0 0 0 0 0 0 0
##          7 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.2153, 0.6924)
##     No Information Rate : 0.4444          
##     P-Value [Acc > NIR] : 0.59            
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity            1.0000   0.0000  0.00000   0.0000       NA  0.00000
## Specificity            0.0000   1.0000  1.00000   1.0000        1  1.00000
## Pos Pred Value         0.4444      NaN      NaN      NaN       NA      NaN
## Neg Pred Value            NaN   0.7778  0.94444   0.8889       NA  0.94444
## Prevalence             0.4444   0.2222  0.05556   0.1111        0  0.05556
## Detection Rate         0.4444   0.0000  0.00000   0.0000        0  0.00000
## Detection Prevalence   1.0000   0.0000  0.00000   0.0000        0  0.00000
## Balanced Accuracy      0.5000   0.5000  0.50000   0.5000       NA  0.50000
##                      Class: 7
## Sensitivity            0.0000
## Specificity            1.0000
## Pos Pred Value            NaN
## Neg Pred Value         0.8889
## Prevalence             0.1111
## Detection Rate         0.0000
## Detection Prevalence   0.0000
## Balanced Accuracy      0.5000

Regresi Logistik Multinomal

Pembuatan Model

log_model <- multinom(class_type ~ ., data = train)
## # weights:  126 (102 variable)
## initial  value 161.510542 
## iter  10 value 10.510024
## iter  20 value 0.049522
## iter  30 value 0.002035
## iter  40 value 0.000402
## final  value 0.000063 
## converged
summary(log_model)
## Call:
## multinom(formula = class_type ~ ., data = train)
## 
## Coefficients:
##   (Intercept)       hair    feathers      eggs       milk   airborne    aquatic
## 2    0.677605 -43.482378  128.491918  2.906694  -62.48411  -1.239885 -12.665958
## 3   10.214371 -96.134709 -100.783271  3.331750 -101.07737 -20.172078 -81.586405
## 4  -16.216777  -5.220667    9.809099 73.078459  -66.73755  17.486240  -7.764446
## 5  -67.353467 -62.517385    1.016034 24.874097  -69.51600  -2.172317  69.843020
## 6  -22.409084 -23.595257    9.161048 57.965589  -31.16388  59.273436 -41.262258
## 7  133.965267 -42.326759    5.872441 13.203903  -38.54315 -32.918423   7.550430
##      predator    toothed    backbone  breathes  venomous      fins       legs
## 2   2.7789335  -1.248341   -4.308894 -24.23579   4.67938  16.02902  12.410356
## 3   0.1164044  17.062279   75.226423 -22.01818  57.70502 -79.89750  -8.541119
## 4 -10.1371057  13.133842   -1.359614 -86.74845  12.13821  52.78517  14.227331
## 5 -17.7242307  38.250581   21.592603  43.12856  -2.76416 -33.35685  11.270228
## 6 -24.6732173  -8.481207  -39.541544  30.30207 -31.20064  23.00812   8.101123
## 7  58.6232337 -13.733957 -106.435291 -24.98812  39.64470   2.55780 -11.358743
##         tail    domestic     catsize
## 2 -24.800010  10.0481458  -7.9766998
## 3  75.996203 -11.6613150  -5.3049992
## 4   6.967211   2.9462901   2.4593420
## 5 -16.938656 -21.9696663 -37.8683501
## 6 -39.150588 -26.2358062   0.8227688
## 7   1.875908  -0.4791144 -47.0141065
## 
## Std. Errors:
##    (Intercept)         hair     feathers         eggs         milk     airborne
## 2 1.921302e+03 1.037767e-07 1.921302e+03 1.921302e+03 4.222239e-11 1.921302e+03
## 3 6.609523e-03 5.623511e-18 6.699444e-20 3.772944e-12 5.623511e-18 1.982667e-26
## 4 7.200298e+02 1.151855e+03 4.530748e-11 7.200298e+02 5.505152e-11 1.151855e+03
## 5 3.833468e+03 4.696876e-10 3.833468e+03 3.833468e+03 4.696876e-10 3.833468e+03
## 6 3.658402e+03 1.151855e+03 4.793197e+03 3.658402e+03 3.627701e-12 3.658402e+03
## 7 4.860296e+02 5.156506e-19 3.196546e-07 4.761206e+02 1.497674e+01 8.893449e-12
##        aquatic     predator      toothed     backbone     breathes     venomous
## 2 3.833468e+03 2.384399e+04 6.567164e-21 1.921302e+03 1.921302e+03 1.132519e-12
## 3 6.609523e-03 6.609523e-03 6.609523e-03 6.609523e-03 9.264122e-11 6.609523e-03
## 4 4.761206e+02 4.761206e+02 2.316607e+02 2.316607e+02 1.151855e+03 1.151857e+03
## 5 3.833468e+03 2.384399e+04 5.622981e-10 3.833468e+03 3.833468e+03 1.902560e-25
## 6 1.227705e-09 1.745647e-11 3.627701e-12 4.793197e+03 3.658402e+03 1.151855e+03
## 7 4.860296e+02 4.860296e+02 2.249861e+02 2.249861e+02 1.497674e+01 4.998901e-03
##           fins         legs         tail     domestic      catsize
## 2 1.009923e-38 3.842603e+03 1.921302e+03 1.301098e-15 2.415971e+04
## 3 9.639395e-11 3.705453e-10 6.609523e-03 1.870558e-30 9.639403e-11
## 4 2.316607e+02 3.692633e+03 2.316607e+02 1.151855e+03 1.610653e-03
## 5 4.906032e-17 7.666936e+03 3.833468e+03 1.560435e-18 2.736848e+04
## 6 9.914184e-47 2.941336e+03 4.793197e+03 1.151855e+03 4.781450e+03
## 7 2.249889e+02 4.162207e+03 2.249861e+02 4.837968e-20 1.497513e+01
## 
## Residual Deviance: 0.0001250045 
## AIC: 204.0001

Estimasi Parameter

coef(log_model)
##   (Intercept)       hair    feathers      eggs       milk   airborne    aquatic
## 2    0.677605 -43.482378  128.491918  2.906694  -62.48411  -1.239885 -12.665958
## 3   10.214371 -96.134709 -100.783271  3.331750 -101.07737 -20.172078 -81.586405
## 4  -16.216777  -5.220667    9.809099 73.078459  -66.73755  17.486240  -7.764446
## 5  -67.353467 -62.517385    1.016034 24.874097  -69.51600  -2.172317  69.843020
## 6  -22.409084 -23.595257    9.161048 57.965589  -31.16388  59.273436 -41.262258
## 7  133.965267 -42.326759    5.872441 13.203903  -38.54315 -32.918423   7.550430
##      predator    toothed    backbone  breathes  venomous      fins       legs
## 2   2.7789335  -1.248341   -4.308894 -24.23579   4.67938  16.02902  12.410356
## 3   0.1164044  17.062279   75.226423 -22.01818  57.70502 -79.89750  -8.541119
## 4 -10.1371057  13.133842   -1.359614 -86.74845  12.13821  52.78517  14.227331
## 5 -17.7242307  38.250581   21.592603  43.12856  -2.76416 -33.35685  11.270228
## 6 -24.6732173  -8.481207  -39.541544  30.30207 -31.20064  23.00812   8.101123
## 7  58.6232337 -13.733957 -106.435291 -24.98812  39.64470   2.55780 -11.358743
##         tail    domestic     catsize
## 2 -24.800010  10.0481458  -7.9766998
## 3  75.996203 -11.6613150  -5.3049992
## 4   6.967211   2.9462901   2.4593420
## 5 -16.938656 -21.9696663 -37.8683501
## 6 -39.150588 -26.2358062   0.8227688
## 7   1.875908  -0.4791144 -47.0141065

Uji Signifikansi

z <- summary(log_model)$coefficients / summary(log_model)$standard.errors
p_value <- (1 - pnorm(abs(z))) * 2

p_value
##   (Intercept)      hair  feathers      eggs       milk  airborne   aquatic
## 2   0.9997186 0.0000000 0.9466792 0.9987929 0.00000000 0.9994851 0.9973638
## 3   0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000
## 4   0.9820313 0.9963837 0.0000000 0.9191586 0.00000000 0.9878878 0.9869889
## 5   0.9859820 0.0000000 0.9997885 0.9948228 0.00000000 0.9995479 0.9854639
## 6   0.9951127 0.9836568 0.9984750 0.9873584 0.00000000 0.9870732 0.0000000
## 7   0.7828308 0.0000000 0.0000000 0.9778757 0.01006658 0.0000000 0.9876054
##    predator   toothed  backbone   breathes  venomous      fins      legs
## 2 0.9999070 0.0000000 0.9982106 0.98993555 0.0000000 0.0000000 0.9974231
## 3 0.0000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000
## 4 0.9830135 0.9547887 0.9953173 0.93996653 0.9915921 0.8197586 0.9969258
## 5 0.9994069 0.0000000 0.9955058 0.99102356 0.0000000 0.0000000 0.9988271
## 6 0.0000000 0.0000000 0.9934179 0.99339130 0.9783901 0.0000000 0.9978024
## 7 0.9039947 0.9513245 0.6361597 0.09522409 0.0000000 0.9909294 0.9978226
##        tail  domestic     catsize
## 2 0.9897013 0.0000000 0.999736566
## 3 0.0000000 0.0000000 0.000000000
## 4 0.9760072 0.9979591 0.000000000
## 5 0.9964745 0.0000000 0.998896009
## 6 0.9934830 0.9818282 0.999862704
## 7 0.9933474 0.0000000 0.001692483

Odds Ratio

exp(coef(log_model))
##    (Intercept)         hair     feathers         eggs         milk     airborne
## 2 1.969156e+00 1.305699e-19 6.358154e+55 1.829621e+01 7.302887e-28 2.894174e-01
## 3 2.729259e+04 1.775114e-42 1.699736e-44 2.798727e+01 1.266645e-44 1.735313e-09
## 4 9.060313e-08 5.403726e-03 1.819858e+04 5.464767e+31 1.038124e-29 3.928054e+07
## 5 5.607391e-30 7.063903e-28 2.762219e+00 6.348675e+10 6.450373e-31 1.139134e-01
## 6 1.852928e-10 5.658613e-11 9.519032e+03 1.493260e+25 2.922122e-14 5.522381e+25
## 7 1.514873e+58 4.146888e-19 3.551149e+02 5.424780e+05 1.823580e-17 5.054875e-15
##        aquatic     predator      toothed     backbone     breathes     venomous
## 2 3.156781e-06 1.610184e+01 2.869804e-01 1.344841e-02 2.982158e-11 1.077032e+02
## 3 3.693810e-36 1.123450e+00 2.570713e+07 4.681881e+32 2.739204e-10 1.150723e+25
## 4 4.245648e-04 3.958320e-05 5.057722e+05 2.567598e-01 2.116544e-38 1.868769e+05
## 5 2.149998e+30 2.006617e-08 4.092759e+16 2.385330e+09 5.376435e+18 6.302901e-02
## 6 1.202345e-18 1.925564e-11 2.073282e-04 6.719328e-18 1.445510e+13 2.816654e-14
## 7 1.901561e+03 2.882351e+25 1.084971e-06 5.966786e-47 1.405396e-11 1.649960e+17
##           fins         legs         tail     domestic      catsize
## 2 9.147747e+06 2.453292e+05 1.696261e-11 2.311289e+04 3.433707e-04
## 3 1.999664e-35 1.952717e-04 1.010954e+33 8.620952e-06 4.966702e-03
## 4 8.400595e+22 1.509564e+06 1.061258e+03 1.903520e+01 1.169711e+01
## 5 3.260657e-15 7.845091e+04 4.401851e-08 2.875380e-10 3.580837e-17
## 6 9.824226e+09 3.298169e+03 9.933804e-18 4.035842e-12 2.276795e+00
## 7 1.290739e+01 1.166704e-05 6.526745e+00 6.193316e-01 3.819733e-21

Evaluasi Model

log_pred <- predict(log_model, test)

confusionMatrix(log_pred, test$class_type)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 1 2 3 4 5 6 7
##          1 8 0 0 0 0 0 0
##          2 0 3 0 0 0 0 0
##          3 0 0 1 0 0 0 0
##          4 0 0 0 2 0 0 1
##          5 0 1 0 0 0 0 0
##          6 0 0 0 0 0 1 0
##          7 0 0 0 0 0 0 1
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8889          
##                  95% CI : (0.6529, 0.9862)
##     No Information Rate : 0.4444          
##     P-Value [Acc > NIR] : 0.0001202       
##                                           
##                   Kappa : 0.8487          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity            1.0000   0.7500  1.00000   1.0000       NA  1.00000
## Specificity            1.0000   1.0000  1.00000   0.9375  0.94444  1.00000
## Pos Pred Value         1.0000   1.0000  1.00000   0.6667       NA  1.00000
## Neg Pred Value         1.0000   0.9333  1.00000   1.0000       NA  1.00000
## Prevalence             0.4444   0.2222  0.05556   0.1111  0.00000  0.05556
## Detection Rate         0.4444   0.1667  0.05556   0.1111  0.00000  0.05556
## Detection Prevalence   0.4444   0.1667  0.05556   0.1667  0.05556  0.05556
## Balanced Accuracy      1.0000   0.8750  1.00000   0.9688       NA  1.00000
##                      Class: 7
## Sensitivity           0.50000
## Specificity           1.00000
## Pos Pred Value        1.00000
## Neg Pred Value        0.94118
## Prevalence            0.11111
## Detection Rate        0.05556
## Detection Prevalence  0.05556
## Balanced Accuracy     0.75000

Perbandingan Model

Confusion Matrix Heatmap

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
cm <- confusionMatrix(log_pred, test$class_type)
cm_table <- as.data.frame(cm$table)

ggplot(cm_table, aes(Prediction, Reference, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq)) +
  scale_fill_gradient(low = "white", high = "blue")

Interpretasi & Insight

Feature Importance

importance <- abs(coef(log_model))
importance_df <- as.data.frame(importance)

importance_df %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(x = name, y = value)) +
  geom_bar(stat = "identity") +
  coord_flip()