Kelompok 1 Analisis Multivariat Topik 5 (MANCOVA) Sains Data 2023E
Dosen Pembimbing : Ike Fitriyaningsih, M.Si
# Load library
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## ---
## biotools version 4.3
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.4.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
# Load data
df <- read.csv("C:/Users/shoba/OneDrive/Dokumen/Annas/TUGAS KULIAH/SEMESTER 4/ANALISIS MULTIVARIAT/UAS/hasil_transformasi_log.csv", stringsAsFactors = TRUE)
# Konversi ke factor
df$Attrition <- as.factor(df$Attrition)
df$JobRole <- as.factor(df$JobRole)
# Subset data relevan
data_manova <- df[, c("Attrition", "JobRole", "Age_log",
"YearsWithCurrManager_log", "TotalWorkingYears_log", "YearsAtCompany_log")]
# Korelasi Pearson antar variabel dependen
dependent_vars <- df[, c("YearsWithCurrManager_log", "TotalWorkingYears_log", "YearsAtCompany_log")]
cor_matrix <- cor(dependent_vars, method = "pearson")
# Visualisasi korelasi
corrplot(cor_matrix, method = "color", addCoef.col = "black", tl.cex = 0.8, number.cex = 0.7)
# Distribusi Attrition
ggplot(df, aes(x = Attrition)) +
geom_bar(fill = "steelblue") +
ggtitle("Distribusi Attrition")
# Distribusi JobRole
ggplot(df, aes(y = JobRole)) +
geom_bar(fill = "darkgreen") +
ggtitle("Distribusi JobRole")
# Variabel numerik
num_cols <- c("Age_log", "YearsWithCurrManager_log", "TotalWorkingYears_log", "YearsAtCompany_log")
# Boxplot per variabel numerik
for (col in num_cols) {
p <- ggplot(df, aes_string(x = col)) +
geom_boxplot(fill = "tomato") +
ggtitle(paste("Boxplot", col)) +
theme_minimal()
print(p)
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Model dummy untuk VIF
model_vif <- lm(YearsAtCompany_log ~ Attrition + JobRole, data = data_manova)
# Hitung VIF
vif(model_vif)
## GVIF Df GVIF^(1/(2*Df))
## Attrition 1.053785 1 1.02654
## JobRole 1.053785 8 1.00328
# Ambil variabel dependen
y_vars <- data_manova[, c("YearsWithCurrManager_log", "TotalWorkingYears_log", "YearsAtCompany_log")]
# Uji normalitas multivariat (Royston Test)
mvn(data = y_vars, mvnTest = "royston")
## $multivariateNormality
## Test H p value MVN
## 1 Royston 286.984 3.596275e-62 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling YearsWithCurrManager_log 41.2963 <0.001 NO
## 2 Anderson-Darling TotalWorkingYears_log 30.2756 <0.001 NO
## 3 Anderson-Darling YearsAtCompany_log 19.0728 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th
## YearsWithCurrManager_log 1253 1.262275 0.7765269 1.098612 0 2.708050 1.098612
## TotalWorkingYears_log 1253 2.123745 0.6082434 2.302585 0 3.091042 1.791759
## YearsAtCompany_log 1253 1.682448 0.6692273 1.791759 0 2.890372 1.098612
## 75th Skew Kurtosis
## YearsWithCurrManager_log 2.079442 -0.3306121 -0.9572467
## TotalWorkingYears_log 2.484907 -1.0514642 1.0581523
## YearsAtCompany_log 2.197225 -0.5473811 -0.3451232
# Uji Box’s M berdasarkan grup Attrition
boxM(y_vars, grouping = data_manova$Attrition)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: y_vars
## Chi-Sq (approx.) = 38.684, df = 6, p-value = 8.256e-07
# Scatterplot matrix untuk mengecek hubungan linear antar Y
pairs(y_vars, panel = panel.smooth, main = "Scatterplot Matrix - Linearitas")
# Buat model MANCOVA
mancova_model <- manova(cbind(YearsWithCurrManager_log, TotalWorkingYears_log, YearsAtCompany_log) ~
Attrition + JobRole + Age_log, data = data_manova)
# Ringkasan hasil MANCOVA
summary(mancova_model, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Attrition 1 0.91006 40.849 3 1240 < 2.2e-16 ***
## JobRole 8 0.72204 17.811 24 3597 < 2.2e-16 ***
## Age_log 1 0.69994 177.193 3 1240 < 2.2e-16 ***
## Residuals 1242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_mgr <- lm(YearsWithCurrManager_log ~ JobRole + Age_log, data = data_manova)
mod_work <- lm(TotalWorkingYears_log ~ JobRole + Age_log, data = data_manova)
mod_company <- lm(YearsAtCompany_log ~ JobRole + Age_log, data = data_manova)
# EMMeans untuk YearsWithCurrManager_log
emmeans(mod_mgr, pairwise ~ JobRole, adjust = "tukey")
## $emmeans
## JobRole emmean SE df lower.CL upper.CL
## Healthcare Representative 1.336 0.0730 1243 1.193 1.479
## Human Resources 1.186 0.1050 1243 0.980 1.391
## Laboratory Technician 1.133 0.0473 1243 1.040 1.226
## Manager 1.232 0.1680 1243 0.902 1.562
## Manufacturing Director 1.498 0.0679 1243 1.364 1.631
## Research Director 1.611 0.1270 1243 1.362 1.860
## Research Scientist 1.173 0.0444 1243 1.086 1.260
## Sales Executive 1.444 0.0436 1243 1.358 1.529
## Sales Representative 0.779 0.0835 1243 0.616 0.943
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df
## Healthcare Representative - Human Resources 0.1506 0.1280 1243
## Healthcare Representative - Laboratory Technician 0.2029 0.0873 1243
## Healthcare Representative - Manager 0.1043 0.1830 1243
## Healthcare Representative - Manufacturing Director -0.1614 0.0993 1243
## Healthcare Representative - Research Director -0.2748 0.1460 1243
## Healthcare Representative - Research Scientist 0.1633 0.0857 1243
## Healthcare Representative - Sales Executive -0.1073 0.0847 1243
## Healthcare Representative - Sales Representative 0.5568 0.1120 1243
## Human Resources - Laboratory Technician 0.0523 0.1150 1243
## Human Resources - Manager -0.0463 0.1980 1243
## Human Resources - Manufacturing Director -0.3120 0.1250 1243
## Human Resources - Research Director -0.4254 0.1650 1243
## Human Resources - Research Scientist 0.0126 0.1140 1243
## Human Resources - Sales Executive -0.2580 0.1140 1243
## Human Resources - Sales Representative 0.4062 0.1340 1243
## Laboratory Technician - Manager -0.0986 0.1750 1243
## Laboratory Technician - Manufacturing Director -0.3643 0.0829 1243
## Laboratory Technician - Research Director -0.4777 0.1360 1243
## Laboratory Technician - Research Scientist -0.0396 0.0647 1243
## Laboratory Technician - Sales Executive -0.3102 0.0645 1243
## Laboratory Technician - Sales Representative 0.3539 0.0954 1243
## Manager - Manufacturing Director -0.2657 0.1810 1243
## Manager - Research Director -0.3791 0.2100 1243
## Manager - Research Scientist 0.0590 0.1740 1243
## Manager - Sales Executive -0.2116 0.1740 1243
## Manager - Sales Representative 0.4525 0.1890 1243
## Manufacturing Director - Research Director -0.1134 0.1440 1243
## Manufacturing Director - Research Scientist 0.3247 0.0813 1243
## Manufacturing Director - Sales Executive 0.0541 0.0805 1243
## Manufacturing Director - Sales Representative 0.7182 0.1080 1243
## Research Director - Research Scientist 0.4381 0.1350 1243
## Research Director - Sales Executive 0.1675 0.1340 1243
## Research Director - Sales Representative 0.8316 0.1530 1243
## Research Scientist - Sales Executive -0.2706 0.0623 1243
## Research Scientist - Sales Representative 0.3935 0.0941 1243
## Sales Executive - Sales Representative 0.6642 0.0946 1243
## t.ratio p.value
## 1.179 0.9608
## 2.323 0.3288
## 0.571 0.9997
## -1.625 0.7911
## -1.884 0.6248
## 1.905 0.6105
## -1.268 0.9405
## 4.971 <.0001
## 0.454 1.0000
## -0.234 1.0000
## -2.498 0.2338
## -2.584 0.1942
## 0.111 1.0000
## -2.272 0.3601
## 3.028 0.0630
## -0.563 0.9998
## -4.393 0.0004
## -3.519 0.0133
## -0.612 0.9995
## -4.812 0.0001
## 3.710 0.0067
## -1.467 0.8706
## -1.805 0.6790
## 0.338 1.0000
## -1.220 0.9523
## 2.394 0.2880
## -0.789 0.9972
## 3.995 0.0022
## 0.672 0.9991
## 6.644 <.0001
## 3.252 0.0321
## 1.250 0.9451
## 5.441 <.0001
## -4.342 0.0005
## 4.183 0.0010
## 7.022 <.0001
##
## P value adjustment: tukey method for comparing a family of 9 estimates
# EMMeans untuk TotalWorkingYears_log
emmeans(mod_work, pairwise ~ JobRole, adjust = "tukey")
## $emmeans
## JobRole emmean SE df lower.CL upper.CL
## Healthcare Representative 2.36 0.0446 1243 2.27 2.45
## Human Resources 2.03 0.0641 1243 1.91 2.16
## Laboratory Technician 1.98 0.0289 1243 1.92 2.03
## Manager 2.54 0.1030 1243 2.34 2.75
## Manufacturing Director 2.31 0.0415 1243 2.23 2.39
## Research Director 2.56 0.0776 1243 2.41 2.72
## Research Scientist 2.00 0.0271 1243 1.95 2.05
## Sales Executive 2.28 0.0266 1243 2.23 2.33
## Sales Representative 1.62 0.0510 1243 1.52 1.72
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df
## Healthcare Representative - Human Resources 0.3253 0.0780 1243
## Healthcare Representative - Laboratory Technician 0.3810 0.0534 1243
## Healthcare Representative - Manager -0.1858 0.1120 1243
## Healthcare Representative - Manufacturing Director 0.0455 0.0607 1243
## Healthcare Representative - Research Director -0.2047 0.0891 1243
## Healthcare Representative - Research Scientist 0.3584 0.0524 1243
## Healthcare Representative - Sales Executive 0.0791 0.0517 1243
## Healthcare Representative - Sales Representative 0.7348 0.0685 1243
## Human Resources - Laboratory Technician 0.0557 0.0703 1243
## Human Resources - Manager -0.5110 0.1210 1243
## Human Resources - Manufacturing Director -0.2798 0.0763 1243
## Human Resources - Research Director -0.5300 0.1010 1243
## Human Resources - Research Scientist 0.0331 0.0696 1243
## Human Resources - Sales Executive -0.2461 0.0694 1243
## Human Resources - Sales Representative 0.4095 0.0820 1243
## Laboratory Technician - Manager -0.5668 0.1070 1243
## Laboratory Technician - Manufacturing Director -0.3355 0.0507 1243
## Laboratory Technician - Research Director -0.5857 0.0830 1243
## Laboratory Technician - Research Scientist -0.0226 0.0395 1243
## Laboratory Technician - Sales Executive -0.3019 0.0394 1243
## Laboratory Technician - Sales Representative 0.3538 0.0583 1243
## Manager - Manufacturing Director 0.2313 0.1110 1243
## Manager - Research Director -0.0189 0.1280 1243
## Manager - Research Scientist 0.5442 0.1060 1243
## Manager - Sales Executive 0.2649 0.1060 1243
## Manager - Sales Representative 0.9206 0.1160 1243
## Manufacturing Director - Research Director -0.2502 0.0878 1243
## Manufacturing Director - Research Scientist 0.3129 0.0497 1243
## Manufacturing Director - Sales Executive 0.0336 0.0492 1243
## Manufacturing Director - Sales Representative 0.6893 0.0661 1243
## Research Director - Research Scientist 0.5631 0.0823 1243
## Research Director - Sales Executive 0.2839 0.0819 1243
## Research Director - Sales Representative 0.9395 0.0934 1243
## Research Scientist - Sales Executive -0.2792 0.0381 1243
## Research Scientist - Sales Representative 0.3764 0.0575 1243
## Sales Executive - Sales Representative 0.6557 0.0578 1243
## t.ratio p.value
## 4.168 0.0011
## 7.138 <.0001
## -1.665 0.7679
## 0.749 0.9980
## -2.297 0.3446
## 6.842 <.0001
## 1.529 0.8419
## 10.734 <.0001
## 0.792 0.9971
## -4.220 0.0009
## -3.665 0.0079
## -5.268 <.0001
## 0.476 0.9999
## -3.547 0.0120
## 4.995 <.0001
## -5.297 <.0001
## -6.619 <.0001
## -7.060 <.0001
## -0.572 0.9997
## -7.661 <.0001
## 6.069 <.0001
## 2.090 0.4808
## -0.148 1.0000
## 5.110 <.0001
## 2.498 0.2337
## 7.969 <.0001
## -2.849 0.1026
## 6.300 <.0001
## 0.683 0.9990
## 10.434 <.0001
## 6.840 <.0001
## 3.466 0.0160
## 10.059 <.0001
## -7.332 <.0001
## 6.547 <.0001
## 11.344 <.0001
##
## P value adjustment: tukey method for comparing a family of 9 estimates
# EMMeans untuk YearsAtCompany_log
emmeans(mod_company, pairwise ~ JobRole, adjust = "tukey")
## $emmeans
## JobRole emmean SE df lower.CL upper.CL
## Healthcare Representative 1.81 0.0618 1243 1.69 1.93
## Human Resources 1.66 0.0888 1243 1.49 1.83
## Laboratory Technician 1.54 0.0401 1243 1.46 1.61
## Manager 1.74 0.1420 1243 1.47 2.02
## Manufacturing Director 1.86 0.0575 1243 1.75 1.97
## Research Director 2.00 0.1070 1243 1.79 2.21
## Research Scientist 1.59 0.0376 1243 1.52 1.66
## Sales Executive 1.87 0.0369 1243 1.80 1.94
## Sales Representative 1.22 0.0707 1243 1.08 1.36
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df
## Healthcare Representative - Human Resources 0.15243 0.1080 1243
## Healthcare Representative - Laboratory Technician 0.27756 0.0739 1243
## Healthcare Representative - Manager 0.06808 0.1550 1243
## Healthcare Representative - Manufacturing Director -0.04811 0.0841 1243
## Healthcare Representative - Research Director -0.18890 0.1230 1243
## Healthcare Representative - Research Scientist 0.22258 0.0726 1243
## Healthcare Representative - Sales Executive -0.05486 0.0717 1243
## Healthcare Representative - Sales Representative 0.59625 0.0948 1243
## Human Resources - Laboratory Technician 0.12514 0.0974 1243
## Human Resources - Manager -0.08435 0.1680 1243
## Human Resources - Manufacturing Director -0.20054 0.1060 1243
## Human Resources - Research Director -0.34133 0.1390 1243
## Human Resources - Research Scientist 0.07016 0.0964 1243
## Human Resources - Sales Executive -0.20729 0.0961 1243
## Human Resources - Sales Representative 0.44382 0.1140 1243
## Laboratory Technician - Manager -0.20949 0.1480 1243
## Laboratory Technician - Manufacturing Director -0.32568 0.0702 1243
## Laboratory Technician - Research Director -0.46647 0.1150 1243
## Laboratory Technician - Research Scientist -0.05498 0.0548 1243
## Laboratory Technician - Sales Executive -0.33243 0.0546 1243
## Laboratory Technician - Sales Representative 0.31868 0.0808 1243
## Manager - Manufacturing Director -0.11619 0.1530 1243
## Manager - Research Director -0.25698 0.1780 1243
## Manager - Research Scientist 0.15451 0.1480 1243
## Manager - Sales Executive -0.12294 0.1470 1243
## Manager - Sales Representative 0.52817 0.1600 1243
## Manufacturing Director - Research Director -0.14079 0.1220 1243
## Manufacturing Director - Research Scientist 0.27070 0.0688 1243
## Manufacturing Director - Sales Executive -0.00675 0.0682 1243
## Manufacturing Director - Sales Representative 0.64436 0.0915 1243
## Research Director - Research Scientist 0.41149 0.1140 1243
## Research Director - Sales Executive 0.13404 0.1130 1243
## Research Director - Sales Representative 0.78515 0.1290 1243
## Research Scientist - Sales Executive -0.27745 0.0528 1243
## Research Scientist - Sales Representative 0.37367 0.0796 1243
## Sales Executive - Sales Representative 0.65111 0.0801 1243
## t.ratio p.value
## 1.410 0.8941
## 3.754 0.0057
## 0.440 1.0000
## -0.572 0.9997
## -1.530 0.8414
## 3.068 0.0561
## -0.765 0.9977
## 6.288 <.0001
## 1.284 0.9359
## -0.503 0.9999
## -1.896 0.6164
## -2.449 0.2583
## 0.728 0.9984
## -2.157 0.4350
## 3.908 0.0031
## -1.413 0.8929
## -4.639 0.0001
## -4.059 0.0017
## -1.004 0.9856
## -6.091 <.0001
## 3.946 0.0027
## -0.758 0.9979
## -1.445 0.8800
## 1.047 0.9811
## -0.837 0.9957
## 3.301 0.0275
## -1.157 0.9650
## 3.935 0.0028
## -0.099 1.0000
## 7.042 <.0001
## 3.609 0.0097
## 1.181 0.9604
## 6.069 <.0001
## -5.259 <.0001
## 4.692 0.0001
## 8.133 <.0001
##
## P value adjustment: tukey method for comparing a family of 9 estimates