TPG-Latihan-Prak-11

Library

library(cluster)
library(factoextra)
library(ggplot2)
library(tidyr)
library(scales)
library(readxl)
library(RColorBrewer)
library(clusterCrit)
library(rattle)
library(MASS)
library(dplyr) 
library(HDclassif)

Import Data

data <- read_excel("D:\\Github\\STA1342-TPG\\Data_prak11.xlsx")
data <- data %>% mutate(Status = as.factor(Status))
head(data)
## # A tibble: 6 × 4
##   tanggungan pendapatan  Usia Status
##        <dbl>      <dbl> <dbl> <fct> 
## 1          6       10      45 Bad   
## 2          1        9      55 Bad   
## 3          2        4.8    41 Bad   
## 4          0        4.9    58 Bad   
## 5          1        4.7    40 Bad   
## 6          3        2.4    50 Bad

Pisahkan Data per Kelompok

bad <- subset(data, Status=="Bad")[,1:3]
good <- subset(data, Status=="Good")[,1:3]

Hitung Mean per Kelompok

mean_bad <- colMeans(bad)
mean_good <- colMeans(good)
mean_bad; mean_good
## tanggungan pendapatan       Usia 
##   2.127660   7.129787  50.978723
## tanggungan pendapatan       Usia 
##   1.679245  11.388679  53.339623

Hitung Covariance Masing-masing Kelompok

S_bad <- cov(bad)
S_good <- cov(good)
S_bad; S_good
##            tanggungan pendapatan       Usia
## tanggungan  1.7224792   1.056984 -0.1059204
## pendapatan  1.0569843  10.073441 -1.6623959
## Usia       -0.1059204  -1.662396 50.5430157
##            tanggungan pendapatan       Usia
## tanggungan  1.5682148 -0.1633164  0.4187228
## pendapatan -0.1633164 16.0929463  2.1635341
## Usia        0.4187228  2.1635341 87.8824383

Hitung Covariance Pooled

n1 <- nrow(bad)
n2 <- nrow(good)
S_pooled <- ((n1-1)*S_bad + (n2-1)*S_good) / (n1+n2-2)
S_pooled
##            tanggungan pendapatan       Usia
## tanggungan  1.6406246  0.4094778  0.1724617
## pendapatan  0.4094778 13.2674643  0.3676894
## Usia        0.1724617  0.3676894 70.3557706

Invers Covariance

S_inv <- solve(S_pooled)
S_inv
##              tanggungan    pendapatan          Usia
## tanggungan  0.614394862 -0.0189232584 -0.0014071580
## pendapatan -0.018923258  0.0759661039 -0.0003506236
## Usia       -0.001407158 -0.0003506236  0.0142187569

Hitung Koefisien a (Fisher LDA)

a <- S_inv %*% (mean_bad - mean_good)
a
##                   [,1]
## tanggungan  0.35941771
## pendapatan -0.33118911
## Usia       -0.03270677

Hitung Nilai h

h <- 0.5 * t(a) %*% (mean_bad + mean_good)
h
##           [,1]
## [1,] -4.088381

Hitung Skor Fisher

X <- as.matrix(data[,1:3])
data$score <- X %*% a
head(data)
## # A tibble: 6 × 5
##   tanggungan pendapatan  Usia Status score[,1]
##        <dbl>      <dbl> <dbl> <fct>      <dbl>
## 1          6       10      45 Bad        -2.63
## 2          1        9      55 Bad        -4.42
## 3          2        4.8    41 Bad        -2.21
## 4          0        4.9    58 Bad        -3.52
## 5          1        4.7    40 Bad        -2.51
## 6          3        2.4    50 Bad        -1.35

Klasifikasi

h <- as.numeric(h)

data$pred_manual <- ifelse(data$score >= h, "Bad", "Good")

Confusion Matrix

table(Actual=data$Status, Pred=data$pred_manual)
##       Pred
## Actual Bad Good
##   Bad   38    9
##   Good  12   41

Error Rate

cm <- table(Actual=data$Status, Pred=data$pred_manual)
cm
##       Pred
## Actual Bad Good
##   Bad   38    9
##   Good  12   41
cm["Bad","Good"]
## [1] 9
# Error Bad = Bad predicted as Good
err_bad <- cm["Bad","Good"] / sum(data$Status=="Bad")

cm["Good","Bad"]
## [1] 12
# Error Good = Good predicted as Bad
err_good <- cm["Good","Bad"] / sum(data$Status=="Good")


err_bad; err_good
## [1] 0.1914894
## [1] 0.2264151

LDA Menggunakan Library MASS

library(MASS)
model <- lda(Status ~ tanggungan + pendapatan + Usia, data=data)
model
## Call:
## lda(Status ~ tanggungan + pendapatan + Usia, data = data)
## 
## Prior probabilities of groups:
##  Bad Good 
## 0.47 0.53 
## 
## Group means:
##      tanggungan pendapatan     Usia
## Bad    2.127660   7.129787 50.97872
## Good   1.679245  11.388679 53.33962
## 
## Coefficients of linear discriminants:
##                   LD1
## tanggungan -0.2799010
## pendapatan  0.2579176
## Usia        0.0254708

Prediksi Library MASS

pred <- predict(model)$class
table(Actual=data$Status, Pred=pred)
##       Pred
## Actual Bad Good
##   Bad   36   11
##   Good  11   42

Fungsi Diskriminan per Kelas (Bad & Good)

# Mean masing-masing kelas
mu_bad <- as.numeric(model$means["Bad", ])
mu_good <- as.numeric(model$means["Good", ])


# Koefisien per kelas: b_k = S_inv %*% mu_k
b_bad <- S_inv %*% mu_bad
b_good <- S_inv %*% mu_good


# Konstanta per kelas: c_k = -1/2 * mu' * S_inv * mu + log(prior)
prior <- model$prior


c_bad <- -0.5 * t(mu_bad) %*% S_inv %*% mu_bad + log(prior["Bad"])
c_good <- -0.5 * t(mu_good) %*% S_inv %*% mu_good + log(prior["Good"])
# Tampilkan fungsi Bad
a_bad <- c(c_bad, b_bad)
a_bad
## [1] -21.9854491   1.1005692   0.4834856   0.7193603
# Tampilkan fungsi Good
a_good <- c(c_good, b_good)
a_good
## [1] -25.9536858   0.7411515   0.8146747   0.7520670