library(readr)
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.5.2
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.5.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(factoextra)
library(clusterCrit)
## Warning: package 'clusterCrit' was built under R version 4.5.2
library(ggplot2)
library(tidyr)
library(scales)
## Warning: package 'scales' was built under R version 4.5.2
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
## The following object is masked from 'package:readr':
##
## col_factor
library(corrplot)
## corrplot 0.95 loaded
library(cluster)
library(MASS)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data diambil dari hasil analisis gerombol pada tugas sebelumnya dengan sumber data World Happiness Index 2021 analisis gerombol dan sumber data
d = read_csv("D:\\Semester 5\\TPG\\TPG\\Analisis Diskriminan\\Data Analisis Diskriminan (raw).csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Negara, Kelas
## dbl (2): Logged GDP per capita, Social support
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(d)
## # A tibble: 6 × 4
## Negara Kelas `Logged GDP per capita` `Social support`
## <chr> <chr> <dbl> <dbl>
## 1 Iceland Middle 1.25 1.46
## 2 Israel Middle 0.986 1.08
## 3 Costa Rica Middle 0.386 0.664
## 4 Czech Republic Middle 0.970 1.15
## 5 United States Middle 1.37 0.916
## 6 Belgium Middle 1.20 0.794
# pisahkan data per kelompok
M = subset(d, Kelas == "Middle") [,1:4]
L = subset(d, Kelas == "Low") [,1:4]
M
## # A tibble: 50 × 4
## Negara Kelas `Logged GDP per capita` `Social support`
## <chr> <chr> <dbl> <dbl>
## 1 Iceland Middle 1.25 1.46
## 2 Israel Middle 0.986 1.08
## 3 Costa Rica Middle 0.386 0.664
## 4 Czech Republic Middle 0.970 1.15
## 5 United States Middle 1.37 0.916
## 6 Belgium Middle 1.20 0.794
## 7 France Middle 1.10 1.11
## 8 Bahrain Middle 1.07 0.411
## 9 Malta Middle 1.07 1.01
## 10 Taiwan Province of China Middle 1.24 0.725
## # ℹ 40 more rows
L
## # A tibble: 50 × 4
## Negara Kelas `Logged GDP per capita` `Social support`
## <chr> <chr> <dbl> <dbl>
## 1 Guatemala Low -0.327 -0.0152
## 2 Kosovo Low -0.0986 0.0544
## 3 Uzbekistan Low -0.515 0.899
## 4 El Salvador Low -0.326 -0.459
## 5 Thailand Low 0.322 0.638
## 6 Nicaragua Low -0.701 0.429
## 7 Honduras Low -0.677 -0.0239
## 8 Philippines Low -0.307 0.133
## 9 Kyrgyzstan Low -0.772 0.681
## 10 Bolivia Low -0.333 -0.0413
## # ℹ 40 more rows
# Menghitung mean kelompok Middle dan Low
mean_M <- colMeans(M[, 3:4], na.rm = TRUE)
mean_L <- colMeans(L[, 3:4], na.rm = TRUE)
mean <- d %>%
group_by(Kelas) %>%
summarise(
Mean_GDP = mean(`Logged GDP per capita`, na.rm = TRUE),
Mean_Social = mean(`Social support`, na.rm = TRUE)
)
mean
## # A tibble: 2 × 3
## Kelas Mean_GDP Mean_Social
## <chr> <dbl> <dbl>
## 1 Low -0.824 -0.595
## 2 Middle 0.706 0.656
Melihat keragaman dari tiap kelompok
# Matriks Covariance Middle
cov_M = cov(M[, 3:4])
cov_M
## Logged GDP per capita Social support
## Logged GDP per capita 0.17247969 0.03828451
## Social support 0.03828451 0.15032744
# Matriks Covariance Low
cov_L = cov(L[, 3:4])
cov_L
## Logged GDP per capita Social support
## Logged GDP per capita 0.4592910 0.3565371
## Social support 0.3565371 0.7793380
Menyatukan covariance kedua kelompok sehingga bisa direpresentasikan di dalam 1 matrix
# Hitung jumlah sampel (n) tiap kelompok
n_M = nrow(M)
n_L = nrow(L)
# Hitung Matriks Covariance gabungan dari kelompok Middle dan Low
pembilang = ((n_M-1)*cov_M) + ((n_L-1)*cov_L)
penyebut = n_M + n_L -2
# Hasil akhir
S_gab = pembilang/penyebut
S_gab
## Logged GDP per capita Social support
## Logged GDP per capita 0.3158853 0.1974108
## Social support 0.1974108 0.4648327
S_gab_inv = solve(S_gab)
S_gab_inv
## Logged GDP per capita Social support
## Logged GDP per capita 4.309480 -1.830202
## Social support -1.830202 2.928584
Menemukan garis pemisah antara kelas Middle dan Low
a = S_gab_inv %*% (mean_M - mean_L)
a
## [,1]
## Logged GDP per capita 4.3029991
## Social support 0.8633509
Sebagai ambang batas suatu objek tergolong ke dalam kelas Middle atau Low
h = 0.5 * t(a) %*% (mean_M + mean_L)
h
## [,1]
## [1,] -0.2259766
Klasifikasikan data dengan membandingkan nilai batas (cutoff)
Jika Skor > h –> Masuk ke Kelompok Middle Jika Skor < h –> Masuk ke Kelompok Low
# Ubah data kolom 3 dan 4 jadi matriks
X = as.matrix(d[3:4])
# Hitung Fisher skor
fisher = X %*% a # kalikan matriks X dengan nilai a
# Gabungkan hasil
d_hasil = d
d_hasil$fisher = as.vector(fisher)
# klasifikasi Prediksi
d_hasil$Prediksi = ifelse(d_hasil$fisher > as.numeric(h), "Middle", "Low")
head(d_hasil)
## # A tibble: 6 × 6
## Negara Kelas `Logged GDP per capita` `Social support` fisher Prediksi
## <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 Iceland Middle 1.25 1.46 6.63 Middle
## 2 Israel Middle 0.986 1.08 5.18 Middle
## 3 Costa Rica Middle 0.386 0.664 2.24 Middle
## 4 Czech Republic Middle 0.970 1.15 5.17 Middle
## 5 United States Middle 1.37 0.916 6.70 Middle
## 6 Belgium Middle 1.20 0.794 5.85 Middle
# Buat tabel silang/tabel akurasi (Confusion Matrix)
conf_matrix = table(Kelas_Asli = d_hasil$Kelas, Prediksi_Model = d_hasil$Prediksi)
conf_matrix
## Prediksi_Model
## Kelas_Asli Low Middle
## Low 43 7
## Middle 2 48
# Error Rate kelompok Low (Aktual "Low" (Baris Low), tapi masuk ke kolom "Middle")
err_low = conf_matrix["Low", "Middle"] / sum(d_hasil$Kelas == "Low")
err_low
## [1] 0.14
# Error Rate kelompok Low (Aktual "Middle" (Baris Middle), tapi masuk ke kolom "Low")
err_middle = conf_matrix["Middle", "Low"] / sum(d_hasil$Kelas == "Middle")
err_middle
## [1] 0.04