##### TUGAS MASUK ANMUL FISHER #####
# Memuat paket yang diperlukan
library(DT)
library(MVN)     
library(MASS)    
library(biotools) 
## ---
## biotools version 4.2
library(stats) 
library(lmtest) 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(mvnormtest)

# Fungsi untuk menguji normalitas multivariat
mnorm.test <- function(x) { 
  rata2 <- apply(x, 2, mean) 
  mcov <- var(x) 
  ds <- sort(mahalanobis(x, center = rata2, cov = mcov)) 
  n <- length(ds) 
  p <- (1:n - 0.5) / n 
  chi <- qchisq(p, df = ncol(x)) 
  win.graph() 
  plot(ds, chi, type = "p", main = "Mahalanobis Distance vs Chi-Squared Quantiles")
  return(ks.test(ds, chi, df = ncol(x))) 
} 

# Membaca data
TKM <- read.table(file.choose(), header = TRUE) 

# Deteksi multikolinearitas
library(car)
## Loading required package: carData
fit <- lm(TKM$MINUM ~ ., data = TKM) 
vif(fit)
##     USIA    BERAT   TINGGI   INCOME JAMKERJA OLAHRAGA 
## 3.475357 1.029991 1.097213 4.977696 1.059160 2.080941
# Uji normalitas multivariat
X <- as.matrix(TKM[, 2:7]) # Mengambil variabel prediktor
mnorm.test(X) 
## Warning in ks.test.default(ds, chi, df = ncol(x)): Parameter(s) df ignored
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  ds and chi
## D = 0.10667, p-value = 0.7907
## alternative hypothesis: two-sided
mshapiro.test(t(X[1:75,1:6]))
## 
##  Shapiro-Wilk normality test
## 
## data:  Z
## W = 0.72501, p-value = 1.503e-10
# Visualisasi normalitas multivariat
mvn(data = TKM[, 2:7], multivariatePlot = 'qq') 
## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.373937 6.461498e-14  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling   USIA       0.5054  0.1965      YES   
## 2 Anderson-Darling   BERAT      1.4704   8e-04      NO    
## 3 Anderson-Darling  TINGGI      2.4090  <0.001      NO    
## 4 Anderson-Darling  INCOME      0.9026  0.0203      NO    
## 5 Anderson-Darling JAMKERJA     0.7526   0.048      NO    
## 6 Anderson-Darling OLAHRAGA     1.2269  0.0032      NO    
## 
## $Descriptives
##           n       Mean     Std.Dev Median Min    Max   25th   75th        Skew
## USIA     75  30.426667   6.2734389   30.0  20  45.00  25.50  35.00  0.18600800
## BERAT    75  54.546667  10.5654886   51.0  35  80.00  47.00  62.00  0.49650798
## TINGGI   75 159.986667   6.7433517  159.0 148 179.00 156.00 162.00  1.11512654
## INCOME   75 619.733333 124.5312653  600.0 400 900.00 530.00 700.00 -0.05999561
## JAMKERJA 75   5.268533   0.1345432    5.3   5   5.74   5.17   5.33  0.43951950
## OLAHRAGA 75   3.053333   0.6312436    3.0   2   4.50   2.50   3.50 -0.01353020
##            Kurtosis
## USIA     -0.7067899
## BERAT    -0.7432683
## TINGGI    1.1561273
## INCOME   -0.7261161
## JAMKERJA  0.6750696
## OLAHRAGA -0.8404944
# Uji Box's M untuk homoskedastisitas
boxM(data = TKM[, 2:7], grouping = TKM[, 1]) 
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  TKM[, 2:7]
## Chi-Sq (approx.) = 27.265, df = 21, p-value = 0.1623
# Analisis MANOVA
m <- manova(formula = cbind(TKM$UMUR, TKM$USIA, TKM$BERAT, TKM$TINGGI) ~ TKM$MINUM) 
summary(object = m, test = 'Wilks') 
##           Df   Wilks approx F num Df den Df   Pr(>F)   
## TKM$MINUM  1 0.80914   5.5825      3     71 0.001701 **
## Residuals 73                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Analisis Diskriminan Linier (Fisher)
linearDA <- lda(formula = TKM$MINUM ~ ., data = TKM) 
linearDA
## Call:
## lda(TKM$MINUM ~ ., data = TKM)
## 
## Prior probabilities of groups:
##         0         1 
## 0.5066667 0.4933333 
## 
## Group means:
##       USIA    BERAT   TINGGI   INCOME JAMKERJA OLAHRAGA
## 0 31.86842 57.21053 158.4474 659.4737 5.268421 3.197368
## 1 28.94595 51.81081 161.5676 578.9189 5.268649 2.905405
## 
## Coefficients of linear discriminants:
##                   LD1
## USIA      0.047566511
## BERAT    -0.063338513
## TINGGI    0.094872291
## INCOME   -0.008432149
## JAMKERJA -0.375351993
## OLAHRAGA  0.204124889
# Prediksi dan Tabel Kontingensi
predicted <- predict(object = linearDA, newdata = TKM) 
confusion_matrix <- table(actual = TKM$MINUM, predicted = predicted$class)

# Menampilkan tabel kontingensi
print(confusion_matrix)
##       predicted
## actual  0  1
##      0 27 11
##      1 11 26
# Menghitung akurasi
APER <- aer(TKM$MINUM, predicted$class)
APER
## [1] 0.2933333
akurasi <-1-APER
cat("Akurasi model: ", akurasi * 100, "%\n")
## Akurasi model:  70.66667 %