1. Baca Data
data <- read_excel("D:/User/Unduhan/Data Diskriminan contoh.xlsx")
data$Status <- as.factor(data$Status)
head(data)
## # A tibble: 6 × 4
## tanggungan pendapatan Usia Status
## <dbl> <dbl> <dbl> <fct>
## 1 6 10 39 Bad
## 2 1 9 49 Bad
## 3 2 4.8 35 Bad
## 4 0 4.9 52 Bad
## 5 1 4.7 34 Bad
## 6 3 2.4 44 Bad
2. Pisahkan Data per Kelas
bad <- subset(data, Status=="Bad")[,1:3]
good <- subset(data, Status=="Good")[,1:3]
3. Hitung Mean per Kelompok
mean_bad <- colMeans(bad)
mean_good <- colMeans(good)
mean_bad
## tanggungan pendapatan Usia
## 2.127660 7.129787 44.978723
mean_good
## tanggungan pendapatan Usia
## 1.679245 11.388679 47.339623
4. Hitung Covariance Masing-masing Kelompok
S_bad <- cov(bad)
S_good <- cov(good)
S_bad
## tanggungan pendapatan Usia
## tanggungan 1.7224792 1.056984 -0.1059204
## pendapatan 1.0569843 10.073441 -1.6623959
## Usia -0.1059204 -1.662396 50.5430157
S_good
## tanggungan pendapatan Usia
## tanggungan 1.5682148 -0.1633164 0.4187228
## pendapatan -0.1633164 16.0929463 2.1635341
## Usia 0.4187228 2.1635341 87.8824383
5. Hitung Covariance Pooled
n1 <- nrow(bad)
n2 <- nrow(good)
S_pooled <- ((n1-1)*S_bad + (n2-1)*S_good) / (n1+n2-2)
S_pooled
## tanggungan pendapatan Usia
## tanggungan 1.6406246 0.4094778 0.1724617
## pendapatan 0.4094778 13.2674643 0.3676894
## Usia 0.1724617 0.3676894 70.3557706
6. Invers Covariance
S_inv <- solve(S_pooled)
S_inv
## tanggungan pendapatan Usia
## tanggungan 0.614394862 -0.0189232584 -0.0014071580
## pendapatan -0.018923258 0.0759661039 -0.0003506236
## Usia -0.001407158 -0.0003506236 0.0142187569
7. Hitung Koefisien a
a <- S_inv %*% (mean_bad - mean_good)
a
## [,1]
## tanggungan 0.35941771
## pendapatan -0.33118911
## Usia -0.03270677
8. Hitung h
h <- 0.5 * t(a) %*% (mean_bad + mean_good)
h <- as.numeric(h)
h
## [1] -3.89214
9. Hitung Skor Fisher
X <- as.matrix(data[,1:3])
data$score <- X %*% a
head(data)
## # A tibble: 6 × 5
## tanggungan pendapatan Usia Status score[,1]
## <dbl> <dbl> <dbl> <fct> <dbl>
## 1 6 10 39 Bad -2.43
## 2 1 9 49 Bad -4.22
## 3 2 4.8 35 Bad -2.02
## 4 0 4.9 52 Bad -3.32
## 5 1 4.7 34 Bad -2.31
## 6 3 2.4 44 Bad -1.16
10. Klasifikasi Manual
data$pred_manual <- ifelse(data$score >= h, "Bad", "Good")
data
## # A tibble: 100 × 6
## tanggungan pendapatan Usia Status score[,1] pred_manual[,1]
## <dbl> <dbl> <dbl> <fct> <dbl> <chr>
## 1 6 10 39 Bad -2.43 Bad
## 2 1 9 49 Bad -4.22 Good
## 3 2 4.8 35 Bad -2.02 Bad
## 4 0 4.9 52 Bad -3.32 Bad
## 5 1 4.7 34 Bad -2.31 Bad
## 6 3 2.4 44 Bad -1.16 Bad
## 7 3 4.6 37 Bad -1.66 Bad
## 8 2 4.9 56 Bad -2.74 Bad
## 9 4 8 35 Bad -2.36 Bad
## 10 1 8.3 49 Bad -3.99 Good
## # ℹ 90 more rows
11. Confusion Matrix dan Error Rate
cm <- table(Actual=data$Status, Pred=data$pred_manual)
cm
## Pred
## Actual Bad Good
## Bad 38 9
## Good 12 41
err_bad <- cm["Bad","Good"] / sum(data$Status=="Bad")
err_good <- cm["Good","Bad"] / sum(data$Status=="Good")
err_bad
## [1] 0.1914894
err_good
## [1] 0.2264151
12. LDA Menggunakan MASS
model <- lda(Status ~ tanggungan + pendapatan + Usia, data=data)
model
## Call:
## lda(Status ~ tanggungan + pendapatan + Usia, data = data)
##
## Prior probabilities of groups:
## Bad Good
## 0.47 0.53
##
## Group means:
## tanggungan pendapatan Usia
## Bad 2.127660 7.129787 44.97872
## Good 1.679245 11.388679 47.33962
##
## Coefficients of linear discriminants:
## LD1
## tanggungan -0.2799010
## pendapatan 0.2579176
## Usia 0.0254708