Dataset –> Student Placement Link Dataset –> https://www.kaggle.com/datasets/sonalshinde123/student-placement-dataset
X1 = Age X2 = CGPA X3 = Internship X4 = Projects X5 = Coding_Skills X6 = Communication_Skills X7 = Aptitude_Test_Score X8 = Soft_Skills_Rating X9 = Certifications X10 = Backlogs
library(knitr)
library(rmarkdown)
library(prettydoc)
library(equatiomatic)
##
## Attaching package: 'equatiomatic'
## The following object is masked from 'package:datasets':
##
## penguins
library(DT)
library(psych)
library(REdaS)
## Loading required package: grid
library(corrplot)
## corrplot 0.95 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library('FactoMineR')
data <- read.csv("D:/BIODATA KAFKA/Sem 4/Analisis Multivariat/Student Placement.csv", header = TRUE, sep =",")
summary(data)
## Student_ID Age Gender Degree
## Min. : 12 Min. :18.00 Length:5000 Length:5000
## 1st Qu.:12424 1st Qu.:19.00 Class :character Class :character
## Median :25469 Median :21.00 Mode :character Mode :character
## Mean :25203 Mean :21.01
## 3rd Qu.:37718 3rd Qu.:23.00
## Max. :49999 Max. :24.00
## Branch CGPA Internships Projects
## Length:5000 Min. :4.500 Min. :0.0000 Min. :1.000
## Class :character 1st Qu.:6.330 1st Qu.:0.0000 1st Qu.:3.000
## Mode :character Median :7.010 Median :1.0000 Median :4.000
## Mean :7.005 Mean :0.7706 Mean :3.749
## 3rd Qu.:7.690 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :9.800 Max. :3.0000 Max. :6.000
## Coding_Skills Communication_Skills Aptitude_Test_Score Soft_Skills_Rating
## Min. : 1.000 Min. : 1.000 Min. : 35.00 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 60.00 1st Qu.: 5.00
## Median : 6.000 Median : 5.000 Median : 70.00 Median : 6.00
## Mean : 5.721 Mean : 5.484 Mean : 69.52 Mean : 5.51
## 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.: 79.00 3rd Qu.: 6.00
## Max. :10.000 Max. :10.000 Max. :100.00 Max. :10.00
## Certifications Backlogs Placement_Status
## Min. :0.000 Min. :0.0000 Length:5000
## 1st Qu.:1.000 1st Qu.:0.0000 Class :character
## Median :2.000 Median :1.0000 Mode :character
## Mean :1.807 Mean :0.8698
## 3rd Qu.:2.000 3rd Qu.:2.0000
## Max. :3.000 Max. :3.0000
data_numerik <- data[, !(names(data) %in% c("Student_ID", "Gender", "Degree", "Branch", "Placement_Status"))]
summary(data_numerik)
## Age CGPA Internships Projects
## Min. :18.00 Min. :4.500 Min. :0.0000 Min. :1.000
## 1st Qu.:19.00 1st Qu.:6.330 1st Qu.:0.0000 1st Qu.:3.000
## Median :21.00 Median :7.010 Median :1.0000 Median :4.000
## Mean :21.01 Mean :7.005 Mean :0.7706 Mean :3.749
## 3rd Qu.:23.00 3rd Qu.:7.690 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :24.00 Max. :9.800 Max. :3.0000 Max. :6.000
## Coding_Skills Communication_Skills Aptitude_Test_Score Soft_Skills_Rating
## Min. : 1.000 Min. : 1.000 Min. : 35.00 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 60.00 1st Qu.: 5.00
## Median : 6.000 Median : 5.000 Median : 70.00 Median : 6.00
## Mean : 5.721 Mean : 5.484 Mean : 69.52 Mean : 5.51
## 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.: 79.00 3rd Qu.: 6.00
## Max. :10.000 Max. :10.000 Max. :100.00 Max. :10.00
## Certifications Backlogs
## Min. :0.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000
## Median :2.000 Median :1.0000
## Mean :1.807 Mean :0.8698
## 3rd Qu.:2.000 3rd Qu.:2.0000
## Max. :3.000 Max. :3.0000
cor(data_numerik)
## Age CGPA Internships Projects
## Age 1.000000000 0.01101079 0.009868339 0.008996816
## CGPA 0.011010795 1.00000000 0.373514922 0.700337915
## Internships 0.009868339 0.37351492 1.000000000 0.522191630
## Projects 0.008996816 0.70033792 0.522191630 1.000000000
## Coding_Skills 0.017931178 0.46758718 0.537636462 0.894409152
## Communication_Skills -0.002467067 -0.01133818 -0.007554366 0.001947031
## Aptitude_Test_Score 0.004893432 0.70182019 0.273366821 0.500845369
## Soft_Skills_Rating -0.009540773 -0.03585377 -0.023266676 -0.026705362
## Certifications 0.008630309 0.54758873 0.477900513 0.827602624
## Backlogs -0.018190546 -0.58427076 -0.220171779 -0.416125053
## Coding_Skills Communication_Skills Aptitude_Test_Score
## Age 0.017931178 -0.002467067 0.004893432
## CGPA 0.467587175 -0.011338179 0.701820191
## Internships 0.537636462 -0.007554366 0.273366821
## Projects 0.894409152 0.001947031 0.500845369
## Coding_Skills 1.000000000 0.004976343 0.337021335
## Communication_Skills 0.004976343 1.000000000 -0.008423186
## Aptitude_Test_Score 0.337021335 -0.008423186 1.000000000
## Soft_Skills_Rating -0.022127375 0.005992770 -0.027359018
## Certifications 0.858133397 -0.001136872 0.528736700
## Backlogs -0.289973406 -0.003013710 -0.409167795
## Soft_Skills_Rating Certifications Backlogs
## Age -0.009540773 0.008630309 -0.018190546
## CGPA -0.035853772 0.547588730 -0.584270759
## Internships -0.023266676 0.477900513 -0.220171779
## Projects -0.026705362 0.827602624 -0.416125053
## Coding_Skills -0.022127375 0.858133397 -0.289973406
## Communication_Skills 0.005992770 -0.001136872 -0.003013710
## Aptitude_Test_Score -0.027359018 0.528736700 -0.409167795
## Soft_Skills_Rating 1.000000000 -0.024712075 0.005512266
## Certifications -0.024712075 1.000000000 -0.328503983
## Backlogs 0.005512266 -0.328503983 1.000000000
corrplot(cor(data_numerik))
eigen(cor(data_numerik))
## eigen() decomposition
## $values
## [1] 4.18119960 1.12555352 1.01144173 0.99817547 0.99006213 0.63619131
## [7] 0.58413171 0.29097113 0.12893176 0.05334163
##
## $vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.009258134 0.01770335 0.581666047 5.883050e-01 0.560483019
## [2,] -0.395577131 0.39652918 -0.013096292 6.843211e-05 -0.012557841
## [3,] -0.303524799 -0.32932746 0.028077447 -2.213220e-02 0.010181367
## [4,] -0.454084731 -0.17274258 -0.009277584 -4.173397e-03 0.004870011
## [5,] -0.413736063 -0.41136733 0.004844642 -8.920826e-04 0.017582199
## [6,] 0.002029160 -0.05035715 -0.472849650 8.048772e-01 -0.353524591
## [7,] -0.335821190 0.43510050 -0.019134184 -7.665428e-03 -0.022181226
## [8,] 0.020095922 -0.01740675 -0.659293992 -6.150139e-02 0.747795496
## [9,] -0.428159633 -0.24150677 -0.004689881 -1.088150e-02 0.005486329
## [10,] 0.278263791 -0.53334298 0.044178475 -4.002925e-02 -0.023764020
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.022911973 0.02077937 0.002629553 -0.002394337 -0.0072200776
## [2,] -0.018067191 0.13464608 0.689022580 -0.348789757 0.2666303186
## [3,] 0.729620313 0.50857954 -0.068313948 -0.034354664 -0.0344716673
## [4,] -0.125527074 -0.17729244 0.329687327 0.393596759 -0.6730810145
## [5,] -0.121093803 -0.27570860 -0.031872693 0.348294329 0.6680286218
## [6,] 0.004029167 0.03144887 0.005437373 -0.005539157 0.0009131115
## [7,] -0.346158602 0.54947570 -0.404074736 0.333109675 0.0317336768
## [8,] -0.010911620 0.03674201 0.012572021 -0.003411459 0.0023094587
## [9,] -0.291539218 -0.10390157 -0.384287838 -0.698458669 -0.1639237658
## [10,] -0.481067428 0.54797772 0.315790146 -0.045645293 0.0219854865
KMOS(data_numerik)
##
## Kaiser-Meyer-Olkin Statistics
##
## Call: KMOS(x = data_numerik)
##
## Measures of Sampling Adequacy (MSA):
## Age CGPA Internships
## 0.3466923 0.7174396 0.9546544
## Projects Coding_Skills Communication_Skills
## 0.7581822 0.6908664 0.3464423
## Aptitude_Test_Score Soft_Skills_Rating Certifications
## 0.7745864 0.7821473 0.8425321
## Backlogs
## 0.8736235
##
## KMO-Criterion: 0.7747372
bart_spher(data_numerik)
## Bartlett's Test of Sphericity
##
## Call: bart_spher(x = data_numerik)
##
## X2 = 28248.231
## df = 45
## p-value < 2.22e-16
pca <- prcomp(data_numerik, center = TRUE, scale. = TRUE)
pca
## Standard deviations (1, .., p=10):
## [1] 2.0447982 1.0609211 1.0057046 0.9990873 0.9950187 0.7976160 0.7642851
## [8] 0.5394174 0.3590707 0.2309581
##
## Rotation (n x k) = (10 x 10):
## PC1 PC2 PC3 PC4
## Age -0.009258134 0.01770335 0.581666047 -5.883050e-01
## CGPA -0.395577131 0.39652918 -0.013096292 -6.843211e-05
## Internships -0.303524799 -0.32932746 0.028077447 2.213220e-02
## Projects -0.454084731 -0.17274258 -0.009277584 4.173397e-03
## Coding_Skills -0.413736063 -0.41136733 0.004844642 8.920826e-04
## Communication_Skills 0.002029160 -0.05035715 -0.472849650 -8.048772e-01
## Aptitude_Test_Score -0.335821190 0.43510050 -0.019134184 7.665428e-03
## Soft_Skills_Rating 0.020095922 -0.01740675 -0.659293992 6.150139e-02
## Certifications -0.428159633 -0.24150677 -0.004689881 1.088150e-02
## Backlogs 0.278263791 -0.53334298 0.044178475 4.002925e-02
## PC5 PC6 PC7 PC8
## Age -0.560483019 -0.022911973 -0.02077937 0.002629553
## CGPA 0.012557841 -0.018067191 -0.13464608 0.689022580
## Internships -0.010181367 0.729620313 -0.50857954 -0.068313948
## Projects -0.004870011 -0.125527074 0.17729244 0.329687327
## Coding_Skills -0.017582199 -0.121093803 0.27570860 -0.031872693
## Communication_Skills 0.353524591 0.004029167 -0.03144887 0.005437373
## Aptitude_Test_Score 0.022181226 -0.346158602 -0.54947570 -0.404074736
## Soft_Skills_Rating -0.747795496 -0.010911620 -0.03674201 0.012572021
## Certifications -0.005486329 -0.291539218 0.10390157 -0.384287838
## Backlogs 0.023764020 -0.481067428 -0.54797772 0.315790146
## PC9 PC10
## Age 0.002394337 0.0072200776
## CGPA 0.348789757 -0.2666303186
## Internships 0.034354664 0.0344716673
## Projects -0.393596759 0.6730810145
## Coding_Skills -0.348294329 -0.6680286218
## Communication_Skills 0.005539157 -0.0009131115
## Aptitude_Test_Score -0.333109675 -0.0317336768
## Soft_Skills_Rating 0.003411459 -0.0023094587
## Certifications 0.698458669 0.1639237658
## Backlogs 0.045645293 -0.0219854865
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0448 1.0609 1.0057 0.99909 0.99502 0.79762 0.76429
## Proportion of Variance 0.4181 0.1126 0.1011 0.09982 0.09901 0.06362 0.05841
## Cumulative Proportion 0.4181 0.5307 0.6318 0.73164 0.83064 0.89426 0.95268
## PC8 PC9 PC10
## Standard deviation 0.5394 0.35907 0.23096
## Proportion of Variance 0.0291 0.01289 0.00533
## Cumulative Proportion 0.9818 0.99467 1.00000
pc_pri_non <- principal(data_numerik, nfactors = ncol(data_numerik), rotate = "none")
pc_pri_non
## Principal Components Analysis
## Call: principal(r = data_numerik, nfactors = ncol(data_numerik), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
## Age 0.02 -0.02 -0.58 0.59 0.56 0.02 0.02 0.00 0.00
## CGPA 0.81 -0.42 0.01 0.00 -0.01 0.01 0.10 0.37 0.13
## Internships 0.62 0.35 -0.03 -0.02 0.01 -0.58 0.39 -0.04 0.01
## Projects 0.93 0.18 0.01 0.00 0.00 0.10 -0.14 0.18 -0.14
## Coding_Skills 0.85 0.44 0.00 0.00 0.02 0.10 -0.21 -0.02 -0.13
## Communication_Skills 0.00 0.05 0.48 0.80 -0.35 0.00 0.02 0.00 0.00
## Aptitude_Test_Score 0.69 -0.46 0.02 -0.01 -0.02 0.28 0.42 -0.22 -0.12
## Soft_Skills_Rating -0.04 0.02 0.66 -0.06 0.74 0.01 0.03 0.01 0.00
## Certifications 0.88 0.26 0.00 -0.01 0.01 0.23 -0.08 -0.21 0.25
## Backlogs -0.57 0.57 -0.04 -0.04 -0.02 0.38 0.42 0.17 0.02
## PC10 h2 u2 com
## Age 0.00 1 -2.2e-16 3.0
## CGPA 0.06 1 3.1e-15 2.1
## Internships -0.01 1 -4.4e-16 3.3
## Projects -0.16 1 3.1e-15 1.3
## Coding_Skills 0.15 1 1.7e-15 1.8
## Communication_Skills 0.00 1 0.0e+00 2.1
## Aptitude_Test_Score 0.01 1 2.3e-15 3.3
## Soft_Skills_Rating 0.00 1 1.0e-15 2.0
## Certifications -0.04 1 1.8e-15 1.7
## Backlogs 0.01 1 8.9e-16 3.8
##
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## SS loadings 4.18 1.13 1.01 1.00 0.99 0.64 0.58 0.29 0.13 0.05
## Proportion Var 0.42 0.11 0.10 0.10 0.10 0.06 0.06 0.03 0.01 0.01
## Cumulative Var 0.42 0.53 0.63 0.73 0.83 0.89 0.95 0.98 0.99 1.00
## Proportion Explained 0.42 0.11 0.10 0.10 0.10 0.06 0.06 0.03 0.01 0.01
## Cumulative Proportion 0.42 0.53 0.63 0.73 0.83 0.89 0.95 0.98 0.99 1.00
##
## Mean item complexity = 2.4
## Test of the hypothesis that 10 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
pc_pri <- principal(data_numerik, nfactors = 3, rotate = "none")
pc_pri
## Principal Components Analysis
## Call: principal(r = data_numerik, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## Age 0.02 -0.02 -0.58 0.34 0.657 1.0
## CGPA 0.81 -0.42 0.01 0.83 0.169 1.5
## Internships 0.62 0.35 -0.03 0.51 0.492 1.6
## Projects 0.93 0.18 0.01 0.90 0.104 1.1
## Coding_Skills 0.85 0.44 0.00 0.91 0.094 1.5
## Communication_Skills 0.00 0.05 0.48 0.23 0.771 1.0
## Aptitude_Test_Score 0.69 -0.46 0.02 0.68 0.315 1.8
## Soft_Skills_Rating -0.04 0.02 0.66 0.44 0.558 1.0
## Certifications 0.88 0.26 0.00 0.83 0.168 1.2
## Backlogs -0.57 0.57 -0.04 0.65 0.354 2.0
##
## PC1 PC2 PC3
## SS loadings 4.18 1.13 1.01
## Proportion Var 0.42 0.11 0.10
## Cumulative Var 0.42 0.53 0.63
## Proportion Explained 0.66 0.18 0.16
## Cumulative Proportion 0.66 0.84 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 2364.78 with prob < 0
##
## Fit based upon off diagonal values = 0.96
pca_result <- PCA(data_numerik,
scale.unit = FALSE, # karena sudah di-scale
graph = FALSE,
ncp = ncol(data_numerik))
pca_result$eig
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 194.10862026 93.18112175 93.18112
## comp 2 4.53716297 2.17804822 95.35917
## comp 3 3.97031712 1.90593598 97.26511
## comp 4 2.32444172 1.11583961 98.38095
## comp 5 1.54377845 0.74108511 99.12203
## comp 6 0.87014919 0.41771188 99.53974
## comp 7 0.48812461 0.23432240 99.77406
## comp 8 0.32570236 0.15635221 99.93042
## comp 9 0.07883831 0.03784604 99.96826
## comp 10 0.06611196 0.03173680 100.00000
fviz_eig(pca,
addlabels = TRUE,
ncp = ncol(data_numerik),
barfill = "skyblue",
barcolor = "darkblue",
linecolor = "red")
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
fviz_pca_biplot(pca_result,
geom.ind = "point",
#col.ind = status.ipm,
#palette = c("#FC4E07","#E7B800", "#00AFBB"),
addEllipses = TRUE,)
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) +
ggtitle("correlation circle")
plot(contrib_circle)
contrib_v_PC1 <- fviz_contrib(pca_result, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
plot(contrib_v_PC1)
contrib_v_PC2 <- fviz_contrib(pca_result, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
plot(contrib_v_PC2)
contrib_v_PC3 <- fviz_contrib(pca_result, choice = "var", axes = 3, top = 5) + ggtitle("PC3")
plot(contrib_v_PC3)
fa <- fa(cor(data_numerik), nfactors = 3, rotate = "varimax")
fa
## Factor Analysis using method = minres
## Call: fa(r = cor(data_numerik), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## Age 0.01 0.00 0.02 0.00034 0.9997 1.6
## CGPA 0.34 0.61 0.72 0.99639 0.0036 2.4
## Internships 0.50 0.08 0.22 0.30409 0.6959 1.4
## Projects 0.83 0.18 0.43 0.90560 0.0944 1.6
## Coding_Skills 0.97 -0.06 0.24 0.99597 0.0040 1.1
## Communication_Skills 0.00 -0.01 0.00 0.00013 0.9999 1.3
## Aptitude_Test_Score 0.40 0.91 0.02 0.99501 0.0050 1.4
## Soft_Skills_Rating -0.02 -0.02 -0.01 0.00109 0.9989 2.7
## Certifications 0.85 0.20 0.19 0.80334 0.1967 1.2
## Backlogs -0.21 -0.35 -0.42 0.34152 0.6585 2.4
##
## MR1 MR2 MR3
## SS loadings 2.92 1.41 1.01
## Proportion Var 0.29 0.14 0.10
## Cumulative Var 0.29 0.43 0.53
## Proportion Explained 0.55 0.26 0.19
## Cumulative Proportion 0.55 0.81 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 45 with the objective function = 5.66
## df of the model are 18 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is 0
##
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of (regression) scores with factors 1.00 1.00 0.99
## Multiple R square of scores with factors 0.99 0.99 0.99
## Minimum correlation of possible factor scores 0.99 0.99 0.98
loads=fa$loadings
fa.diagram(fa)