(Tuliskan latar belakang di sini…)
Dataset yang digunakan dalam penelitian ini berasal dari [https://archive.ics.uci.edu/dataset/697/predict+students+dropout+and+academic+success].
setwd("D:/analisis multivariat_smt4")
dataset <- read.csv("quantitative_features.csv")
| VARIABEL | NAMA FITUR | KETERANGAN |
|---|---|---|
| X1 | Previous qualification (grade) | Nilai pendidikan terakhir |
| X2 | Admission grade | Nilai masuk universitas mahasiswa |
| X3 | Age at enrollment | Usia mahasiswa saat pertama kali masuk kuliah |
| X4 | Curricular units 1st sem (credited) | Mata kuliah yang dikonversi (semester 1) |
| X5 | Curricular units 1st sem (enrolled) | Mata kuliah yang diambil (semester 1) |
| X6 | Curricular units 1st sem (evaluations) | Mata kuliah yang dievaluasi (semester 1) |
| X7 | Curricular units 1st sem (approved) | Mata kuliah yang lulus (semester 1) |
| X8 | Curricular units 1st sem (grade) | Nilai rata-rata mata kuliah (semester 1) |
| X9 | Curricular units 1st sem (without evaluations) | Mata kuliah tanpa evaluasi (semester 1) |
| X10 | Curricular units 2nd sem (credited) | Mata kuliah yang dikonversi (semester 2) |
| X11 | Curricular units 2nd sem (enrolled) | Mata kuliah yang diambil (semester 2) |
| X12 | Curricular units 2nd sem (evaluations) | Mata kuliah yang dievaluasi (semester 2) |
| X13 | Curricular units 2nd sem (approved) | Mata kuliah yang lulus (semester 2) |
| X14 | Curricular units 2nd sem (grade) | Nilai rata-rata mata kuliah (semester 2) |
| X15 | Curricular units 2nd sem (without evaluations) | Mata kuliah tanpa evaluasi (semester 2) |
| X16 | Unemployment rate | Tingkat pengangguran negara tempat mahasiswa kuliah |
| X17 | Inflation rate | Tingkat inflasi negara tempat mahasiswa kuliah |
| X18 | GDP | Produk domestik bruto (PDB) negara tempat mahasiswa kuliah |
str(dataset) # Melihat struktur data
## 'data.frame': 4424 obs. of 18 variables:
## $ Previous.qualification..grade. : num 122 160 122 122 100 ...
## $ Admission.grade : num 127 142 125 120 142 ...
## $ Age.at.enrollment : int 20 19 19 20 45 50 18 22 21 18 ...
## $ Curricular.units.1st.sem..credited. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Curricular.units.1st.sem..enrolled. : int 0 6 6 6 6 5 7 5 6 6 ...
## $ Curricular.units.1st.sem..evaluations. : int 0 6 0 8 9 10 9 5 8 9 ...
## $ Curricular.units.1st.sem..approved. : int 0 6 0 6 5 5 7 0 6 5 ...
## $ Curricular.units.1st.sem..grade. : num 0 14 0 13.4 12.3 ...
## $ Curricular.units.1st.sem..without.evaluations.: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Curricular.units.2nd.sem..credited. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Curricular.units.2nd.sem..enrolled. : int 0 6 6 6 6 5 8 5 6 6 ...
## $ Curricular.units.2nd.sem..evaluations. : int 0 6 0 10 6 17 8 5 7 14 ...
## $ Curricular.units.2nd.sem..approved. : int 0 6 0 5 6 5 8 0 6 2 ...
## $ Curricular.units.2nd.sem..grade. : num 0 13.7 0 12.4 13 ...
## $ Curricular.units.2nd.sem..without.evaluations.: int 0 0 0 0 0 5 0 0 0 0 ...
## $ Unemployment.rate : num 10.8 13.9 10.8 9.4 13.9 16.2 15.5 15.5 16.2 8.9 ...
## $ Inflation.rate : num 1.4 -0.3 1.4 -0.8 -0.3 0.3 2.8 2.8 0.3 1.4 ...
## $ GDP : num 1.74 0.79 1.74 -3.12 0.79 -0.92 -4.06 -4.06 -0.92 3.51 ...
summary(dataset) # Statistik ringkasan
## Previous.qualification..grade. Admission.grade Age.at.enrollment
## Min. : 95.0 Min. : 95.0 Min. :17.00
## 1st Qu.:125.0 1st Qu.:117.9 1st Qu.:19.00
## Median :133.1 Median :126.1 Median :20.00
## Mean :132.6 Mean :127.0 Mean :23.27
## 3rd Qu.:140.0 3rd Qu.:134.8 3rd Qu.:25.00
## Max. :190.0 Max. :190.0 Max. :70.00
## Curricular.units.1st.sem..credited. Curricular.units.1st.sem..enrolled.
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 5.000
## Median : 0.00 Median : 6.000
## Mean : 0.71 Mean : 6.271
## 3rd Qu.: 0.00 3rd Qu.: 7.000
## Max. :20.00 Max. :26.000
## Curricular.units.1st.sem..evaluations. Curricular.units.1st.sem..approved.
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 6.000 1st Qu.: 3.000
## Median : 8.000 Median : 5.000
## Mean : 8.299 Mean : 4.707
## 3rd Qu.:10.000 3rd Qu.: 6.000
## Max. :45.000 Max. :26.000
## Curricular.units.1st.sem..grade.
## Min. : 0.00
## 1st Qu.:11.00
## Median :12.29
## Mean :10.64
## 3rd Qu.:13.40
## Max. :18.88
## Curricular.units.1st.sem..without.evaluations.
## Min. : 0.0000
## 1st Qu.: 0.0000
## Median : 0.0000
## Mean : 0.1377
## 3rd Qu.: 0.0000
## Max. :12.0000
## Curricular.units.2nd.sem..credited. Curricular.units.2nd.sem..enrolled.
## Min. : 0.0000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 5.000
## Median : 0.0000 Median : 6.000
## Mean : 0.5418 Mean : 6.232
## 3rd Qu.: 0.0000 3rd Qu.: 7.000
## Max. :19.0000 Max. :23.000
## Curricular.units.2nd.sem..evaluations. Curricular.units.2nd.sem..approved.
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 6.000 1st Qu.: 2.000
## Median : 8.000 Median : 5.000
## Mean : 8.063 Mean : 4.436
## 3rd Qu.:10.000 3rd Qu.: 6.000
## Max. :33.000 Max. :20.000
## Curricular.units.2nd.sem..grade.
## Min. : 0.00
## 1st Qu.:10.75
## Median :12.20
## Mean :10.23
## 3rd Qu.:13.33
## Max. :18.57
## Curricular.units.2nd.sem..without.evaluations. Unemployment.rate
## Min. : 0.0000 Min. : 7.60
## 1st Qu.: 0.0000 1st Qu.: 9.40
## Median : 0.0000 Median :11.10
## Mean : 0.1503 Mean :11.57
## 3rd Qu.: 0.0000 3rd Qu.:13.90
## Max. :12.0000 Max. :16.20
## Inflation.rate GDP
## Min. :-0.800 Min. :-4.060000
## 1st Qu.: 0.300 1st Qu.:-1.700000
## Median : 1.400 Median : 0.320000
## Mean : 1.228 Mean : 0.001969
## 3rd Qu.: 2.600 3rd Qu.: 1.790000
## Max. : 3.700 Max. : 3.510000
head(dataset) # Melihat beberapa baris pertama
## Previous.qualification..grade. Admission.grade Age.at.enrollment
## 1 122.0 127.3 20
## 2 160.0 142.5 19
## 3 122.0 124.8 19
## 4 122.0 119.6 20
## 5 100.0 141.5 45
## 6 133.1 114.8 50
## Curricular.units.1st.sem..credited. Curricular.units.1st.sem..enrolled.
## 1 0 0
## 2 0 6
## 3 0 6
## 4 0 6
## 5 0 6
## 6 0 5
## Curricular.units.1st.sem..evaluations. Curricular.units.1st.sem..approved.
## 1 0 0
## 2 6 6
## 3 0 0
## 4 8 6
## 5 9 5
## 6 10 5
## Curricular.units.1st.sem..grade.
## 1 0.00000
## 2 14.00000
## 3 0.00000
## 4 13.42857
## 5 12.33333
## 6 11.85714
## Curricular.units.1st.sem..without.evaluations.
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## Curricular.units.2nd.sem..credited. Curricular.units.2nd.sem..enrolled.
## 1 0 0
## 2 0 6
## 3 0 6
## 4 0 6
## 5 0 6
## 6 0 5
## Curricular.units.2nd.sem..evaluations. Curricular.units.2nd.sem..approved.
## 1 0 0
## 2 6 6
## 3 0 0
## 4 10 5
## 5 6 6
## 6 17 5
## Curricular.units.2nd.sem..grade.
## 1 0.00000
## 2 13.66667
## 3 0.00000
## 4 12.40000
## 5 13.00000
## 6 11.50000
## Curricular.units.2nd.sem..without.evaluations. Unemployment.rate
## 1 0 10.8
## 2 0 13.9
## 3 0 10.8
## 4 0 9.4
## 5 0 13.9
## 6 5 16.2
## Inflation.rate GDP
## 1 1.4 1.74
## 2 -0.3 0.79
## 3 1.4 1.74
## 4 -0.8 -3.12
## 5 -0.3 0.79
## 6 0.3 -0.92
boxplot(dataset, main = "Boxplot Data Awal", las = 2)
sum(is.na(dataset))
## [1] 0
data_scaled <- scale(dataset)
colnames(data_scaled) <- paste0("X", 1:ncol(data_scaled)) # Mengembalikan nama kolom setelah scaling
colnames(data_scaled)
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16" "X17" "X18"
correlation_matrix <- cor(data_scaled)
heatmap(correlation_matrix)
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
r <- cor(dataset)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.75
## MSA for each item =
## Previous.qualification..grade.
## 0.52
## Admission.grade
## 0.52
## Age.at.enrollment
## 0.86
## Curricular.units.1st.sem..credited.
## 0.72
## Curricular.units.1st.sem..enrolled.
## 0.74
## Curricular.units.1st.sem..evaluations.
## 0.78
## Curricular.units.1st.sem..approved.
## 0.81
## Curricular.units.1st.sem..grade.
## 0.78
## Curricular.units.1st.sem..without.evaluations.
## 0.56
## Curricular.units.2nd.sem..credited.
## 0.76
## Curricular.units.2nd.sem..enrolled.
## 0.73
## Curricular.units.2nd.sem..evaluations.
## 0.80
## Curricular.units.2nd.sem..approved.
## 0.77
## Curricular.units.2nd.sem..grade.
## 0.78
## Curricular.units.2nd.sem..without.evaluations.
## 0.55
## Unemployment.rate
## 0.44
## Inflation.rate
## 0.40
## GDP
## 0.50
bartlett.test(dataset)
##
## Bartlett test of homogeneity of variances
##
## data: dataset
## Bartlett's K-squared = 97460, df = 17, p-value < 2.2e-16
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
colnames(data_scaled) <- paste0("X", 1:ncol(data_scaled)) # Mengembalikan nama kolom setelah scaling
colnames(data_scaled)
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16" "X17" "X18"
pca_result <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4844 1.5011 1.27133 1.23484 1.14573 1.00481 0.93463
## Proportion of Variance 0.3429 0.1252 0.08979 0.08471 0.07293 0.05609 0.04853
## Cumulative Proportion 0.3429 0.4681 0.55788 0.64260 0.71553 0.77162 0.82015
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.84767 0.79391 0.66307 0.64104 0.60613 0.46180 0.41598
## Proportion of Variance 0.03992 0.03502 0.02443 0.02283 0.02041 0.01185 0.00961
## Cumulative Proportion 0.86006 0.89508 0.91951 0.94234 0.96275 0.97459 0.98421
## PC15 PC16 PC17 PC18
## Standard deviation 0.36779 0.30361 0.18908 0.14515
## Proportion of Variance 0.00751 0.00512 0.00199 0.00117
## Cumulative Proportion 0.99172 0.99684 0.99883 1.00000
screeplot(pca_result, type = "lines", main = "Scree Plot")
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 100))
biplot(pca_result, scale = 0)
fviz_pca_biplot(pca_result, geom.ind = "point", addEllipses = TRUE)
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) +
ggtitle("Kontribusi Variabel")
plot(contrib_circle)
varcov <- cov(data_scaled)
pc <- eigen(varcov)
eigenvalues <- pc$values
num_factors <- sum(eigenvalues > 1)
cat("Jumlah faktor berdasarkan Kaiser's Criterion:", num_factors, "\n")
## Jumlah faktor berdasarkan Kaiser's Criterion: 6
# Faktor Loadings
L = matrix(nrow = ncol(data_scaled), ncol = num_factors)
for (i in 1:num_factors) {
L[, i] = sqrt(eigenvalues[i]) * pc$vectors[, i]
}
print(L) # Menampilkan factor loadings
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.001628836 0.26229234 0.798640479 0.171628682 -0.1632769920
## [2,] -0.023069755 0.23755105 0.793680847 0.200479284 -0.2161124670
## [3,] -0.065322040 -0.49346926 -0.045800767 0.236865517 0.0530907912
## [4,] -0.749600431 -0.41168120 0.061550385 0.363608224 -0.1187676764
## [5,] -0.906014995 -0.21959923 -0.021002725 0.136166891 -0.0204638848
## [6,] -0.755046373 -0.22686877 -0.036110313 -0.201738916 0.0911360237
## [7,] -0.905831794 0.24430283 0.009418594 0.061363972 0.0004624634
## [8,] -0.628881806 0.59501496 -0.041712425 -0.285720966 0.0658525927
## [9,] -0.117779391 -0.49222201 0.294106268 -0.624162506 -0.1741372762
## [10,] -0.748464329 -0.41058949 0.059414715 0.354942339 -0.1241307050
## [11,] -0.877328960 -0.13792956 -0.037526957 0.078569481 0.0014532339
## [12,] -0.736062700 -0.06487591 -0.076158420 -0.266279843 0.0543276024
## [13,] -0.844714039 0.34195300 0.009668512 0.002201885 0.0065824409
## [14,] -0.626596024 0.61903247 -0.045244787 -0.267092167 0.0380182617
## [15,] -0.059019734 -0.46247938 0.257760459 -0.645841441 -0.1823201241
## [16,] -0.055315292 -0.03116231 0.248228480 0.023016446 0.7543721260
## [17,] 0.003345442 -0.05792676 0.012266149 0.175632145 0.2126035457
## [18,] 0.014946060 0.22211895 -0.333896557 0.098801890 -0.7153066614
## [,6]
## [1,] -0.0490288129
## [2,] 0.0324265927
## [3,] 0.0931400705
## [4,] 0.0368110632
## [5,] -0.0190637292
## [6,] -0.0046171452
## [7,] -0.0002925451
## [8,] -0.0375226606
## [9,] -0.0581561407
## [10,] 0.0443705449
## [11,] -0.0039801265
## [12,] -0.0136969929
## [13,] 0.0035796081
## [14,] -0.0345839958
## [15,] -0.0641315779
## [16,] 0.3226885805
## [17,] -0.9342066829
## [18,] 0.0812951487
fa_result <- fa(r = data_scaled, covar = TRUE, nfactors = num_factors, rotate = "promax")
## Loading required namespace: GPArotation
print(fa_result$loadings)
##
## Loadings:
## MR1 MR2 MR6 MR3 MR4 MR5
## X1 0.630
## X2 0.928
## X3 0.188 -0.313 0.120
## X4 1.000 -0.150
## X5 0.776 0.156 0.160
## X6 0.161 0.757
## X7 0.631 0.634
## X8 -0.110 0.812 0.286
## X9 1.018
## X10 1.011 -0.138
## X11 0.668 0.239 0.154
## X12 0.179 0.797
## X13 0.559 0.748 -0.172
## X14 0.876 0.144
## X15 0.570
## X16 -0.350
## X17
## X18 0.119 -0.265 1.040
##
## MR1 MR2 MR6 MR3 MR4 MR5
## SS loadings 3.890 2.665 1.431 1.448 1.274 1.233
## Proportion Var 0.216 0.148 0.080 0.080 0.071 0.068
## Cumulative Var 0.216 0.364 0.444 0.524 0.595 0.663
fa.diagram(fa_result)