Library
library(cluster)
library(factoextra)
library(ggplot2)
library(tidyr)
library(scales)
library(readxl)
library(RColorBrewer)
library(clusterCrit)
library(rattle)
library(MASS)
library(dplyr)
library(HDclassif)
Import Data
data <- read_excel("D:\\Github\\STA1342-TPG\\Data_prak11.xlsx")
data <- data %>% mutate(Status = as.factor(Status))
head(data)
## # A tibble: 6 × 4
## tanggungan pendapatan Usia Status
## <dbl> <dbl> <dbl> <fct>
## 1 6 10 45 Bad
## 2 1 9 55 Bad
## 3 2 4.8 41 Bad
## 4 0 4.9 58 Bad
## 5 1 4.7 40 Bad
## 6 3 2.4 50 Bad
Pisahkan Data per Kelompok
bad <- subset(data, Status=="Bad")[,1:3]
good <- subset(data, Status=="Good")[,1:3]
Hitung Mean per Kelompok
mean_bad <- colMeans(bad)
mean_good <- colMeans(good)
mean_bad; mean_good
## tanggungan pendapatan Usia
## 2.127660 7.129787 50.978723
## tanggungan pendapatan Usia
## 1.679245 11.388679 53.339623
Hitung Covariance Masing-masing Kelompok
S_bad <- cov(bad)
S_good <- cov(good)
S_bad; S_good
## tanggungan pendapatan Usia
## tanggungan 1.7224792 1.056984 -0.1059204
## pendapatan 1.0569843 10.073441 -1.6623959
## Usia -0.1059204 -1.662396 50.5430157
## tanggungan pendapatan Usia
## tanggungan 1.5682148 -0.1633164 0.4187228
## pendapatan -0.1633164 16.0929463 2.1635341
## Usia 0.4187228 2.1635341 87.8824383
Hitung Covariance Pooled
n1 <- nrow(bad)
n2 <- nrow(good)
S_pooled <- ((n1-1)*S_bad + (n2-1)*S_good) / (n1+n2-2)
S_pooled
## tanggungan pendapatan Usia
## tanggungan 1.6406246 0.4094778 0.1724617
## pendapatan 0.4094778 13.2674643 0.3676894
## Usia 0.1724617 0.3676894 70.3557706
Invers Covariance
S_inv <- solve(S_pooled)
S_inv
## tanggungan pendapatan Usia
## tanggungan 0.614394862 -0.0189232584 -0.0014071580
## pendapatan -0.018923258 0.0759661039 -0.0003506236
## Usia -0.001407158 -0.0003506236 0.0142187569
Hitung Koefisien a (Fisher LDA)
a <- S_inv %*% (mean_bad - mean_good)
a
## [,1]
## tanggungan 0.35941771
## pendapatan -0.33118911
## Usia -0.03270677
Hitung Nilai h
h <- 0.5 * t(a) %*% (mean_bad + mean_good)
h
## [,1]
## [1,] -4.088381
Hitung Skor Fisher
X <- as.matrix(data[,1:3])
data$score <- X %*% a
head(data)
## # A tibble: 6 × 5
## tanggungan pendapatan Usia Status score[,1]
## <dbl> <dbl> <dbl> <fct> <dbl>
## 1 6 10 45 Bad -2.63
## 2 1 9 55 Bad -4.42
## 3 2 4.8 41 Bad -2.21
## 4 0 4.9 58 Bad -3.52
## 5 1 4.7 40 Bad -2.51
## 6 3 2.4 50 Bad -1.35
Klasifikasi
h <- as.numeric(h)
data$pred_manual <- ifelse(data$score >= h, "Bad", "Good")
Confusion Matrix
table(Actual=data$Status, Pred=data$pred_manual)
## Pred
## Actual Bad Good
## Bad 38 9
## Good 12 41
Error Rate
cm <- table(Actual=data$Status, Pred=data$pred_manual)
cm
## Pred
## Actual Bad Good
## Bad 38 9
## Good 12 41
## [1] 9
# Error Bad = Bad predicted as Good
err_bad <- cm["Bad","Good"] / sum(data$Status=="Bad")
cm["Good","Bad"]
## [1] 12
# Error Good = Good predicted as Bad
err_good <- cm["Good","Bad"] / sum(data$Status=="Good")
err_bad; err_good
## [1] 0.1914894
## [1] 0.2264151
LDA Menggunakan Library MASS
library(MASS)
model <- lda(Status ~ tanggungan + pendapatan + Usia, data=data)
model
## Call:
## lda(Status ~ tanggungan + pendapatan + Usia, data = data)
##
## Prior probabilities of groups:
## Bad Good
## 0.47 0.53
##
## Group means:
## tanggungan pendapatan Usia
## Bad 2.127660 7.129787 50.97872
## Good 1.679245 11.388679 53.33962
##
## Coefficients of linear discriminants:
## LD1
## tanggungan -0.2799010
## pendapatan 0.2579176
## Usia 0.0254708
Prediksi Library MASS
pred <- predict(model)$class
table(Actual=data$Status, Pred=pred)
## Pred
## Actual Bad Good
## Bad 36 11
## Good 11 42
Fungsi Diskriminan per Kelas (Bad & Good)
# Mean masing-masing kelas
mu_bad <- as.numeric(model$means["Bad", ])
mu_good <- as.numeric(model$means["Good", ])
# Koefisien per kelas: b_k = S_inv %*% mu_k
b_bad <- S_inv %*% mu_bad
b_good <- S_inv %*% mu_good
# Konstanta per kelas: c_k = -1/2 * mu' * S_inv * mu + log(prior)
prior <- model$prior
c_bad <- -0.5 * t(mu_bad) %*% S_inv %*% mu_bad + log(prior["Bad"])
c_good <- -0.5 * t(mu_good) %*% S_inv %*% mu_good + log(prior["Good"])
# Tampilkan fungsi Bad
a_bad <- c(c_bad, b_bad)
a_bad
## [1] -21.9854491 1.1005692 0.4834856 0.7193603
# Tampilkan fungsi Good
a_good <- c(c_good, b_good)
a_good
## [1] -25.9536858 0.7411515 0.8146747 0.7520670