Analisis Metode 1: Discriminant Analysis
Statistika Multivariat - Project
Referensi: PENGARUH FINANCIAL INDICATOR DAN NON-FINANCIAL INDICATOR TERHADAP FINANCIAL DISTRESS DENGAN DISCRIMINANT ANALYSIS DAN LOGISTIC REGRESSION (Studi Pada Perusahaan Sektor Energi yang Terdaftar Di Bursa Efek Indonesia Tahun 2018-2021)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## corrplot 0.95 loaded
## ---
## biotools version 4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
1 Dataset
fin <- readxl::read_excel("C:/Users/DIVNA WIDYASTUTI/Documents/KULIAH/SEM 5/STATMUL 2/projek/datafinansial.xlsx")
glimpse(fin)## Rows: 200
## Columns: 10
## $ Perusahaan <chr> "ADRO", "ADRO", "ADRO", "ADRO", "AKRA", "AKRA", "AKRA", "AK…
## $ Tahun <dbl> 2018, 2019, 2020, 2021, 2018, 2019, 2020, 2021, 2018, 2019,…
## $ ROA <dbl> 0.0676, 0.0603, 0.0248, 0.1356, 0.0333, 0.0327, 0.0515, 0.0…
## $ DER <dbl> 0.6410, 0.8118, 0.6149, 0.7017, 1.0088, 1.1267, 0.7699, 1.0…
## $ OCFR <dbl> 0.3285, 0.2837, 0.3040, 0.4591, -0.0448, 0.0607, 0.1313, 0.…
## $ AGE <dbl> 14, 15, 16, 17, 41, 42, 43, 44, 34, 35, 36, 37, 37, 37, 37,…
## $ SIZE <dbl> 32.2592, 32.2386, 32.1295, 32.3167, 30.6238, 30.6948, 30.55…
## $ INST_OWN <dbl> 43.91120, 43.91120, 43.91120, 43.91120, 59.59670, 59.59670,…
## $ MNJ_OWN <dbl> 12.4001, 12.4034, 12.3956, 12.3865, 0.6755, 0.6755, 0.6619,…
## $ Y <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,…
## Perusahaan Tahun ROA DER
## Length:200 Min. :2018 Min. :-1.53830 Min. :-43.0864
## Class :character 1st Qu.:2019 1st Qu.:-0.01142 1st Qu.: 0.4656
## Mode :character Median :2020 Median : 0.02640 Median : 0.9827
## Mean :2020 Mean : 0.01774 Mean : 1.3858
## 3rd Qu.:2020 3rd Qu.: 0.06985 3rd Qu.: 1.8298
## Max. :2021 Max. : 0.55260 Max. : 34.0556
##
## OCFR AGE SIZE INST_OWN
## Min. :-0.34300 Min. :14.00 Min. :24.77 Min. :10.11
## 1st Qu.: 0.05872 1st Qu.:37.00 1st Qu.:27.84 1st Qu.:43.91
## Median : 0.16095 Median :37.00 Median :28.84 Median :68.44
## Mean : 0.30064 Mean :36.65 Mean :28.93 Mean :63.31
## 3rd Qu.: 0.35018 3rd Qu.:37.00 3rd Qu.:30.08 3rd Qu.:81.82
## Max. : 2.77350 Max. :44.00 Max. :32.32 Max. :97.50
## NA's :1
## MNJ_OWN Y
## Min. : 0.0000 Min. :0.000
## 1st Qu.: 0.0000 1st Qu.:0.000
## Median : 0.0111 Median :1.000
## Mean : 3.7853 Mean :1.105
## 3rd Qu.: 0.6755 3rd Qu.:2.000
## Max. :61.4791 Max. :2.000
##
2 Preprocessing
2.1 Identifikasi Missing Value
## Perusahaan Tahun ROA DER OCFR AGE SIZE
## 0 0 0 0 0 0 0
## INST_OWN MNJ_OWN Y
## 1 0 0
2.2 Penanganan Missing Value
# Imputasi median
if(any(is.na(fin$INST_OWN))){
fin$INST_OWN[is.na(fin$INST_OWN)] <- median(fin$INST_OWN, na.rm = TRUE)
}
# cek ulang
colSums(is.na(fin))## Perusahaan Tahun ROA DER OCFR AGE SIZE
## 0 0 0 0 0 0 0
## INST_OWN MNJ_OWN Y
## 0 0 0
2.3 Transformasi Variabel Respons dan Seleksi Fitur Prediktor
2.4 Visualisasi Korelasi antar Variabel
numeric_vars <- fin[, vars_predictor]
corr_matrix <- cor(numeric_vars, use = "complete.obs")
corrplot(corr_matrix, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")2.5 Deteksi Outlier
z <- scale(fin[, vars_predictor])
outlier_rows <- unique(which(abs(z) > 3, arr.ind = TRUE)[,1])
outlier_rows## [1] 18 19 126 134 13 43 65 66 48 116 120 129 1 2 3 4 46 47 77
## [20] 78 79 80 133 135
3 Discriminant Analysis Assumption Testing
3.1 Normalitas (Kolmogorov-Smirnov)
ks_by_group <- by(fin_final[, vars_predictor], fin_final$Y, function(sub){
sapply(sub, function(x){
x2 <- na.omit(x)
if(length(unique(x2)) >= 3){
round(ks.test(x2, "pnorm", mean=mean(x2), sd=sd(x2))$p.value,4)
} else NA
})
})## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## Warning in ks.test.default(x2, "pnorm", mean = mean(x2), sd = sd(x2)): ties
## should not be present for the one-sample Kolmogorov-Smirnov test
## fin_final$Y: Distress
## ROA DER OCFR AGE SIZE INST_OWN MNJ_OWN
## 0.0061 0.0033 0.0040 0.0000 0.8709 0.0456 0.0000
## ------------------------------------------------------------
## fin_final$Y: Grey
## ROA DER OCFR AGE SIZE INST_OWN MNJ_OWN
## 0.9406 0.0141 0.1837 NA 0.8283 0.0409 0.0000
## ------------------------------------------------------------
## fin_final$Y: NonDistress
## ROA DER OCFR AGE SIZE INST_OWN MNJ_OWN
## 0.0923 0.0174 0.1498 0.0000 0.7562 0.4207 0.0000
3.2 Homogenitas matriks kovarians (Box’s M)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: fin_final[, vars_predictor]
## Chi-Sq (approx.) = Inf, df = 56, p-value < 2.2e-16
3.3 Multikolinearitas (VIF)
## DER OCFR AGE SIZE INST_OWN MNJ_OWN
## 1.176032 1.111543 1.028787 1.095973 1.159639 1.172900
3.4 Outlier multivariat (Mahalanobis)
center <- colMeans(fin_final[, vars_predictor])
S <- cov(fin_final[, vars_predictor])
m_dist <- mahalanobis(fin_final[, vars_predictor], center, S)
p <- length(vars_predictor)
cutoff <- qchisq(0.975, df = p)
sum(m_dist > cutoff)## [1] 16
3.5 MANOVA (perbedaan rata-rata multivariat antar kelas)
manova_model <- manova(as.matrix(fin_final[, vars_predictor]) ~ Y, data = fin_final)
summary(manova_model, test = "Wilks")## Df Wilks approx F num Df den Df Pr(>F)
## Y 2 0.523 9.1318 14 334 < 2.2e-16 ***
## Residuals 173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4 Discriminant Analysis
summarize.class <- function(original, classify) {
class.table <- table(original, classify)
numb <- rowSums(class.table)
prop <- round(class.table / numb, 4)
overall <- round(sum(diag(class.table)) / sum(class.table), 4)
list(class.table = class.table, prop = prop, overall.correct = overall)
}4.1 Linear Discriminant Analysis (LDA)
#Model LDA
lda_model <- lda(Y ~ ., data = fin_final)
pred_lda <- predict(lda_model, newdata = fin_final)
res_lda <- summarize.class(fin_final$Y, pred_lda$class)
res_lda$class.table## classify
## original Distress Grey NonDistress
## Distress 47 5 8
## Grey 16 6 16
## NonDistress 7 5 66
## [1] 0.6761
#Struktur dan Fungsi Diskriminan LDA
ld_scores <- pred_lda$x
structure_mat <- cor(fin_final[, vars_predictor], ld_scores)
structure_mat## LD1 LD2
## ROA 0.7251231 -0.0592660
## DER -0.3483893 -0.1434445
## OCFR 0.6892072 0.5481679
## AGE 0.2927508 0.1036356
## SIZE 0.3129089 -0.3851916
## INST_OWN 0.5376004 -0.2717051
## MNJ_OWN -0.3459319 0.4978753
# Visualisasi LDA
ld_vals <- as.data.frame(ld_scores)
ld_vals$Y <- fin_final$Y
if("LD2" %in% names(ld_vals)){
ggplot(ld_vals, aes(x=LD1, y=LD2, color=Y)) +
geom_point(alpha=0.8, size=2) +
stat_ellipse(aes(fill=Y), geom="polygon", alpha=0.15, show.legend=FALSE) +
labs(title="Proyeksi LDA", x="LD1", y="LD2") + theme_minimal()
} else {
ggplot(ld_vals, aes(x=LD1, fill=Y)) +
geom_density(alpha=0.6) +
labs(title="Distribusi LD1 per kelas") + theme_minimal()
}4.2 Quadratic Discriminant Analysis (QDA)
#Reduksi Dimensi Menggunakan PCA
pca <- prcomp(fin_final[, vars_predictor], scale.=TRUE)
fin_pca <- data.frame(Y=fin_final$Y, pca$x[,1:3])
#Model QDA
qda_model <- qda(Y ~ ., data=fin_pca)
pred_qda <- predict(qda_model, newdata=fin_pca)
res_qda <- summarize.class(fin_pca$Y, pred_qda$class)
res_qda$class.table## classify
## original Distress Grey NonDistress
## Distress 32 18 10
## Grey 2 27 9
## NonDistress 2 28 48
## [1] 0.608
# Visualisasi QDA
qdavals <- data.frame(PC1=fin_pca$PC1, PC2=fin_pca$PC2, Y=fin_pca$Y)
ggplot(qdavals, aes(x=PC1, y=PC2, color=Y)) +
geom_point(alpha=0.8, size=2) +
stat_ellipse(aes(fill=Y), geom="polygon", alpha=0.15, show.legend=FALSE) +
labs(title="Proyeksi QDA berbasis PCA", x="PC1", y="PC2") + theme_minimal()