1. Baca Data

data <- read_excel("D:/User/Unduhan/Data Diskriminan contoh.xlsx")
data$Status <- as.factor(data$Status)
head(data)
## # A tibble: 6 × 4
##   tanggungan pendapatan  Usia Status
##        <dbl>      <dbl> <dbl> <fct> 
## 1          6       10      39 Bad   
## 2          1        9      49 Bad   
## 3          2        4.8    35 Bad   
## 4          0        4.9    52 Bad   
## 5          1        4.7    34 Bad   
## 6          3        2.4    44 Bad

2. Pisahkan Data per Kelas

bad <- subset(data, Status=="Bad")[,1:3]
good <- subset(data, Status=="Good")[,1:3]

3. Hitung Mean per Kelompok

mean_bad <- colMeans(bad)
mean_good <- colMeans(good)
mean_bad
## tanggungan pendapatan       Usia 
##   2.127660   7.129787  44.978723
mean_good
## tanggungan pendapatan       Usia 
##   1.679245  11.388679  47.339623

4. Hitung Covariance Masing-masing Kelompok

S_bad <- cov(bad)
S_good <- cov(good)
S_bad
##            tanggungan pendapatan       Usia
## tanggungan  1.7224792   1.056984 -0.1059204
## pendapatan  1.0569843  10.073441 -1.6623959
## Usia       -0.1059204  -1.662396 50.5430157
S_good
##            tanggungan pendapatan       Usia
## tanggungan  1.5682148 -0.1633164  0.4187228
## pendapatan -0.1633164 16.0929463  2.1635341
## Usia        0.4187228  2.1635341 87.8824383

5. Hitung Covariance Pooled

n1 <- nrow(bad)
n2 <- nrow(good)
S_pooled <- ((n1-1)*S_bad + (n2-1)*S_good) / (n1+n2-2)
S_pooled
##            tanggungan pendapatan       Usia
## tanggungan  1.6406246  0.4094778  0.1724617
## pendapatan  0.4094778 13.2674643  0.3676894
## Usia        0.1724617  0.3676894 70.3557706

6. Invers Covariance

S_inv <- solve(S_pooled)
S_inv
##              tanggungan    pendapatan          Usia
## tanggungan  0.614394862 -0.0189232584 -0.0014071580
## pendapatan -0.018923258  0.0759661039 -0.0003506236
## Usia       -0.001407158 -0.0003506236  0.0142187569

7. Hitung Koefisien a

a <- S_inv %*% (mean_bad - mean_good)
a
##                   [,1]
## tanggungan  0.35941771
## pendapatan -0.33118911
## Usia       -0.03270677

8. Hitung h

h <- 0.5 * t(a) %*% (mean_bad + mean_good)
h <- as.numeric(h)
h
## [1] -3.89214

9. Hitung Skor Fisher

X <- as.matrix(data[,1:3])
data$score <- X %*% a
head(data)
## # A tibble: 6 × 5
##   tanggungan pendapatan  Usia Status score[,1]
##        <dbl>      <dbl> <dbl> <fct>      <dbl>
## 1          6       10      39 Bad        -2.43
## 2          1        9      49 Bad        -4.22
## 3          2        4.8    35 Bad        -2.02
## 4          0        4.9    52 Bad        -3.32
## 5          1        4.7    34 Bad        -2.31
## 6          3        2.4    44 Bad        -1.16

10. Klasifikasi Manual

data$pred_manual <- ifelse(data$score >= h, "Bad", "Good")
data
## # A tibble: 100 × 6
##    tanggungan pendapatan  Usia Status score[,1] pred_manual[,1]
##         <dbl>      <dbl> <dbl> <fct>      <dbl> <chr>          
##  1          6       10      39 Bad        -2.43 Bad            
##  2          1        9      49 Bad        -4.22 Good           
##  3          2        4.8    35 Bad        -2.02 Bad            
##  4          0        4.9    52 Bad        -3.32 Bad            
##  5          1        4.7    34 Bad        -2.31 Bad            
##  6          3        2.4    44 Bad        -1.16 Bad            
##  7          3        4.6    37 Bad        -1.66 Bad            
##  8          2        4.9    56 Bad        -2.74 Bad            
##  9          4        8      35 Bad        -2.36 Bad            
## 10          1        8.3    49 Bad        -3.99 Good           
## # ℹ 90 more rows

11. Confusion Matrix dan Error Rate

cm <- table(Actual=data$Status, Pred=data$pred_manual)
cm
##       Pred
## Actual Bad Good
##   Bad   38    9
##   Good  12   41
err_bad <- cm["Bad","Good"] / sum(data$Status=="Bad")
err_good <- cm["Good","Bad"] / sum(data$Status=="Good")
err_bad
## [1] 0.1914894
err_good
## [1] 0.2264151

12. LDA Menggunakan MASS

model <- lda(Status ~ tanggungan + pendapatan + Usia, data=data)
model
## Call:
## lda(Status ~ tanggungan + pendapatan + Usia, data = data)
## 
## Prior probabilities of groups:
##  Bad Good 
## 0.47 0.53 
## 
## Group means:
##      tanggungan pendapatan     Usia
## Bad    2.127660   7.129787 44.97872
## Good   1.679245  11.388679 47.33962
## 
## Coefficients of linear discriminants:
##                   LD1
## tanggungan -0.2799010
## pendapatan  0.2579176
## Usia        0.0254708