library(readr)
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.5.2
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.5.2
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(factoextra)
library(clusterCrit)
## Warning: package 'clusterCrit' was built under R version 4.5.2
library(ggplot2)
library(tidyr)
library(scales)
## Warning: package 'scales' was built under R version 4.5.2
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## The following object is masked from 'package:readr':
## 
##     col_factor
library(corrplot)
## corrplot 0.95 loaded
library(cluster)
library(MASS)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import Data

Data diambil dari hasil analisis gerombol pada tugas sebelumnya dengan sumber data World Happiness Index 2021 analisis gerombol dan sumber data

d = read_csv("D:\\Semester 5\\TPG\\TPG\\Analisis Diskriminan\\Data Analisis Diskriminan (raw).csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Negara, Kelas
## dbl (2): Logged GDP per capita, Social support
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(d)
## # A tibble: 6 × 4
##   Negara         Kelas  `Logged GDP per capita` `Social support`
##   <chr>          <chr>                    <dbl>            <dbl>
## 1 Iceland        Middle                   1.25             1.46 
## 2 Israel         Middle                   0.986            1.08 
## 3 Costa Rica     Middle                   0.386            0.664
## 4 Czech Republic Middle                   0.970            1.15 
## 5 United States  Middle                   1.37             0.916
## 6 Belgium        Middle                   1.20             0.794
# pisahkan data per kelompok
M = subset(d, Kelas == "Middle") [,1:4]
L = subset(d, Kelas == "Low") [,1:4]
M
## # A tibble: 50 × 4
##    Negara                   Kelas  `Logged GDP per capita` `Social support`
##    <chr>                    <chr>                    <dbl>            <dbl>
##  1 Iceland                  Middle                   1.25             1.46 
##  2 Israel                   Middle                   0.986            1.08 
##  3 Costa Rica               Middle                   0.386            0.664
##  4 Czech Republic           Middle                   0.970            1.15 
##  5 United States            Middle                   1.37             0.916
##  6 Belgium                  Middle                   1.20             0.794
##  7 France                   Middle                   1.10             1.11 
##  8 Bahrain                  Middle                   1.07             0.411
##  9 Malta                    Middle                   1.07             1.01 
## 10 Taiwan Province of China Middle                   1.24             0.725
## # ℹ 40 more rows
L
## # A tibble: 50 × 4
##    Negara      Kelas `Logged GDP per capita` `Social support`
##    <chr>       <chr>                   <dbl>            <dbl>
##  1 Guatemala   Low                   -0.327           -0.0152
##  2 Kosovo      Low                   -0.0986           0.0544
##  3 Uzbekistan  Low                   -0.515            0.899 
##  4 El Salvador Low                   -0.326           -0.459 
##  5 Thailand    Low                    0.322            0.638 
##  6 Nicaragua   Low                   -0.701            0.429 
##  7 Honduras    Low                   -0.677           -0.0239
##  8 Philippines Low                   -0.307            0.133 
##  9 Kyrgyzstan  Low                   -0.772            0.681 
## 10 Bolivia     Low                   -0.333           -0.0413
## # ℹ 40 more rows

Rataan Perkelompok

# Menghitung mean kelompok Middle dan Low
mean_M <- colMeans(M[, 3:4], na.rm = TRUE)
mean_L <- colMeans(L[, 3:4], na.rm = TRUE)

mean <- d %>%
  group_by(Kelas) %>%
  summarise(
    Mean_GDP = mean(`Logged GDP per capita`, na.rm = TRUE),
    Mean_Social = mean(`Social support`, na.rm = TRUE)
  )
mean
## # A tibble: 2 × 3
##   Kelas  Mean_GDP Mean_Social
##   <chr>     <dbl>       <dbl>
## 1 Low      -0.824      -0.595
## 2 Middle    0.706       0.656

Matriks Covariance

Tiap Kelompok

Melihat keragaman dari tiap kelompok

# Matriks Covariance Middle 
cov_M = cov(M[, 3:4])
cov_M
##                       Logged GDP per capita Social support
## Logged GDP per capita            0.17247969     0.03828451
## Social support                   0.03828451     0.15032744
# Matriks Covariance Low
cov_L = cov(L[, 3:4])
cov_L
##                       Logged GDP per capita Social support
## Logged GDP per capita             0.4592910      0.3565371
## Social support                    0.3565371      0.7793380

Gabungan

Menyatukan covariance kedua kelompok sehingga bisa direpresentasikan di dalam 1 matrix

# Hitung jumlah sampel (n) tiap kelompok 
n_M = nrow(M)
n_L = nrow(L)

# Hitung Matriks Covariance gabungan dari kelompok Middle dan Low
pembilang = ((n_M-1)*cov_M) + ((n_L-1)*cov_L)
penyebut = n_M + n_L -2 

# Hasil akhir
S_gab = pembilang/penyebut
S_gab
##                       Logged GDP per capita Social support
## Logged GDP per capita             0.3158853      0.1974108
## Social support                    0.1974108      0.4648327

Invers S_gab

S_gab_inv = solve(S_gab)
S_gab_inv
##                       Logged GDP per capita Social support
## Logged GDP per capita              4.309480      -1.830202
## Social support                    -1.830202       2.928584

Koefisien Model (a)

Menemukan garis pemisah antara kelas Middle dan Low

a = S_gab_inv %*% (mean_M - mean_L)
a
##                            [,1]
## Logged GDP per capita 4.3029991
## Social support        0.8633509

Nilai Batas Klasifikasi (h)

Sebagai ambang batas suatu objek tergolong ke dalam kelas Middle atau Low

h = 0.5 * t(a) %*% (mean_M + mean_L)
h
##            [,1]
## [1,] -0.2259766

Skor Fisher

Klasifikasikan data dengan membandingkan nilai batas (cutoff)

Jika Skor > h –> Masuk ke Kelompok Middle Jika Skor < h –> Masuk ke Kelompok Low

# Ubah data kolom 3 dan 4 jadi matriks 
X = as.matrix(d[3:4])

# Hitung Fisher skor 
fisher = X %*% a # kalikan matriks X dengan nilai a

# Gabungkan hasil
d_hasil = d
d_hasil$fisher = as.vector(fisher)

Nilai Prediksi

# klasifikasi Prediksi
d_hasil$Prediksi = ifelse(d_hasil$fisher > as.numeric(h), "Middle", "Low")
head(d_hasil)
## # A tibble: 6 × 6
##   Negara         Kelas  `Logged GDP per capita` `Social support` fisher Prediksi
##   <chr>          <chr>                    <dbl>            <dbl>  <dbl> <chr>   
## 1 Iceland        Middle                   1.25             1.46    6.63 Middle  
## 2 Israel         Middle                   0.986            1.08    5.18 Middle  
## 3 Costa Rica     Middle                   0.386            0.664   2.24 Middle  
## 4 Czech Republic Middle                   0.970            1.15    5.17 Middle  
## 5 United States  Middle                   1.37             0.916   6.70 Middle  
## 6 Belgium        Middle                   1.20             0.794   5.85 Middle

Confusion Matrix

# Buat tabel silang/tabel akurasi (Confusion Matrix) 
conf_matrix = table(Kelas_Asli = d_hasil$Kelas, Prediksi_Model = d_hasil$Prediksi)
conf_matrix
##           Prediksi_Model
## Kelas_Asli Low Middle
##     Low     43      7
##     Middle   2     48

Error Rate (APER)

# Error Rate kelompok Low (Aktual "Low" (Baris Low), tapi masuk ke kolom "Middle")
err_low = conf_matrix["Low", "Middle"] / sum(d_hasil$Kelas == "Low")

err_low
## [1] 0.14
# Error Rate kelompok Low (Aktual "Middle" (Baris Middle), tapi masuk ke kolom "Low")
err_middle = conf_matrix["Middle", "Low"] / sum(d_hasil$Kelas == "Middle")

err_middle
## [1] 0.04