##Import dan Pemeriksaan Awal Data
library(psych)
library(FactoMineR)
library(factoextra)
library(tidyverse)
library(sjPlot)
| Pada tahap awal dilakukan pemanggilan beberapa library yang
mendukung analisis multivariat. |
data <- read.csv("StudentPerformanceFactors.csv", stringsAsFactors = FALSE)
colnames(data)
## [1] "Hours_Studied" "Attendance"
## [3] "Parental_Involvement" "Access_to_Resources"
## [5] "Extracurricular_Activities" "Sleep_Hours"
## [7] "Previous_Scores" "Motivation_Level"
## [9] "Internet_Access" "Tutoring_Sessions"
## [11] "Family_Income" "Teacher_Quality"
## [13] "School_Type" "Peer_Influence"
## [15] "Physical_Activity" "Learning_Disabilities"
## [17] "Parental_Education_Level" "Distance_from_Home"
## [19] "Gender" "Exam_Score"
head(data)
## Hours_Studied Attendance Parental_Involvement Access_to_Resources
## 1 23 84 Low High
## 2 19 64 Low Medium
## 3 24 98 Medium Medium
## 4 29 89 Low Medium
## 5 19 92 Medium Medium
## 6 19 88 Medium Medium
## Extracurricular_Activities Sleep_Hours Previous_Scores Motivation_Level
## 1 No 7 73 Low
## 2 No 8 59 Low
## 3 Yes 7 91 Medium
## 4 Yes 8 98 Medium
## 5 Yes 6 65 Medium
## 6 Yes 8 89 Medium
## Internet_Access Tutoring_Sessions Family_Income Teacher_Quality School_Type
## 1 Yes 0 Low Medium Public
## 2 Yes 2 Medium Medium Public
## 3 Yes 2 Medium Medium Public
## 4 Yes 1 Medium Medium Public
## 5 Yes 3 Medium High Public
## 6 Yes 3 Medium Medium Public
## Peer_Influence Physical_Activity Learning_Disabilities
## 1 Positive 3 No
## 2 Negative 4 No
## 3 Neutral 4 No
## 4 Negative 4 No
## 5 Neutral 4 No
## 6 Positive 3 No
## Parental_Education_Level Distance_from_Home Gender Exam_Score
## 1 High School Near Male 67
## 2 College Moderate Female 61
## 3 Postgraduate Near Male 74
## 4 High School Moderate Male 71
## 5 College Near Female 70
## 6 Postgraduate Near Male 71
| Tahap ini bertujuan untuk mengimpor dataset ke dalam R menggunakan
read.csv(). Setelah data terbaca, dilakukan pemeriksaan awal melalui
colnames() untuk melihat nama variabel serta head() untuk memastikan
struktur dan isi data sudah sesuai sebelum dilakukan pengolahan lebih
lanjut. |
##Seleksi Variabel
vars <- data %>%
dplyr::select(
Hours_Studied,
Attendance,
Access_to_Resources,
Sleep_Hours,
Previous_Scores,
Motivation_Level,
Tutoring_Sessions,
Family_Income,
Teacher_Quality,
Peer_Influence,
Physical_Activity,
Distance_from_Home,
Exam_Score
)
| Tahap ini memilih variabel-variabel yang relevan untuk dianalisis
dalam PCA dan FA. Pemilihan dilakukan agar analisis terfokus pada
variabel numerik yang berkaitan dengan performa siswa. |
##Transformasi dan Pembersihan Data
vars_num <- vars %>%
mutate(
Access_to_Resources = recode(str_trim(Access_to_Resources),
"Low"=1, "Medium"=2, "High"=3, .default = NA_real_),
Motivation_Level = recode(str_trim(Motivation_Level),
"Low"=1, "Medium"=2, "High"=3, .default = NA_real_),
Family_Income = recode(str_trim(Family_Income),
"Low"=1, "Medium"=2, "High"=3, .default = NA_real_),
Teacher_Quality = recode(str_trim(Teacher_Quality),
"Low"=1, "Medium"=2, "High"=3, .default = NA_real_),
Peer_Influence = recode(str_trim(Peer_Influence),
"Negative"=1, "Neutral"=2, "Positive"=3, .default = NA_real_),
Distance_from_Home = recode(str_trim(Distance_from_Home),
"Near"=1, "Moderate"=2, "Far"=3, .default = NA_real_)
)
vars_clean <- na.omit(vars_num)
| Variabel kategorik diubah menjadi bentuk numerik melalui proses
recode agar dapat dianalisis secara kuantitatif. Selanjutnya, data yang
memiliki nilai kosong dihapus menggunakan na.omit() untuk memastikan
kualitas data dan menghindari gangguan dalam proses analisis. |
vars_scaled <- scale(vars_clean)
exists("vars_scaled")
## [1] TRUE
##Statistika Deskriptif
desc_stats <- vars_clean %>%
summarise(across(everything(),
list(mean = mean, sd = sd, min = min, max = max))) %>%
t() %>%
as.data.frame()
knitr::kable(round(desc_stats, 2),
caption = "Tabel 1. Statistika Deskriptif Variabel Penelitian")
Tabel 1. Statistika Deskriptif Variabel Penelitian
| Hours_Studied_mean |
19.99 |
| Hours_Studied_sd |
5.99 |
| Hours_Studied_min |
1.00 |
| Hours_Studied_max |
44.00 |
| Attendance_mean |
80.02 |
| Attendance_sd |
11.55 |
| Attendance_min |
60.00 |
| Attendance_max |
100.00 |
| Access_to_Resources_mean |
2.10 |
| Access_to_Resources_sd |
0.70 |
| Access_to_Resources_min |
1.00 |
| Access_to_Resources_max |
3.00 |
| Sleep_Hours_mean |
7.03 |
| Sleep_Hours_sd |
1.47 |
| Sleep_Hours_min |
4.00 |
| Sleep_Hours_max |
10.00 |
| Previous_Scores_mean |
75.05 |
| Previous_Scores_sd |
14.40 |
| Previous_Scores_min |
50.00 |
| Previous_Scores_max |
100.00 |
| Motivation_Level_mean |
1.91 |
| Motivation_Level_sd |
0.70 |
| Motivation_Level_min |
1.00 |
| Motivation_Level_max |
3.00 |
| Tutoring_Sessions_mean |
1.49 |
| Tutoring_Sessions_sd |
1.23 |
| Tutoring_Sessions_min |
0.00 |
| Tutoring_Sessions_max |
8.00 |
| Family_Income_mean |
1.79 |
| Family_Income_sd |
0.74 |
| Family_Income_min |
1.00 |
| Family_Income_max |
3.00 |
| Teacher_Quality_mean |
2.20 |
| Teacher_Quality_sd |
0.60 |
| Teacher_Quality_min |
1.00 |
| Teacher_Quality_max |
3.00 |
| Peer_Influence_mean |
2.19 |
| Peer_Influence_sd |
0.76 |
| Peer_Influence_min |
1.00 |
| Peer_Influence_max |
3.00 |
| Physical_Activity_mean |
2.97 |
| Physical_Activity_sd |
1.03 |
| Physical_Activity_min |
0.00 |
| Physical_Activity_max |
6.00 |
| Distance_from_Home_mean |
1.50 |
| Distance_from_Home_sd |
0.67 |
| Distance_from_Home_min |
1.00 |
| Distance_from_Home_max |
3.00 |
| Exam_Score_mean |
67.25 |
| Exam_Score_sd |
3.91 |
| Exam_Score_min |
55.00 |
| Exam_Score_max |
101.00 |
| Pada tahap eksplorasi , dihitung statistika deskriptif berupa
rata-rata, standar deviasi, nilai minimum, dan maksimum untuk setiap
variabel. Tujuannya adalah memberikan gambaran umum mengenai
karakteristik dan variasi data sebelum dilakukan analisis
multivariat. |
##Visualisasi Distribusi Data
library(tidyverse)
vars_long <- vars_clean %>%
pivot_longer(cols = everything(),
names_to = "Variable",
values_to = "Value")
ggplot(vars_long, aes(x = Value)) +
geom_histogram(bins = 30, color = "black", fill = "steelblue") +
facet_wrap(~ Variable, scales = "free") +
theme_minimal() +
labs(x = "Nilai", y = "Frekuensi")

| Data diubah ke format long agar dapat divisualisasikan dalam bentuk
histogram untuk setiap variabel. Visualisasi ini bertujuan untuk melihat
pola distribusi, sebaran, serta kemungkinan adanya pencilan
(outlier). |
##Standarisasi Data
| Data distandarisasi menggunakan fungsi scale() untuk menyamakan
skala antarvariabel. Langkah ini penting dalam PCA dan FA agar variabel
dengan skala besar tidak mendominasi hasil analisis. |
##Uji Asumsi
mat_corr <- cor(vars_scaled)
library(corrplot)
## corrplot 0.95 loaded
corrplot(mat_corr, method = "color", type = "upper", tl.cex = 0.8)

| Pada tahap uji keterkaitan antarvariabel, dihitung matriks korelasi
dan divisualisasikan menggunakan heatmap. Tujuannya adalah melihat pola
hubungan antarvariabel sebagai dasar kelayakan analisis faktor. |
cortest.bartlett(mat_corr, n = nrow(vars_scaled))
## $chisq
## [1] 7221.379
##
## $p.value
## [1] 0
##
## $df
## [1] 78
| Dilakukan uji Bartlett untuk menguji apakah matriks korelasi berbeda
secara signifikan dari matriks identitas. Jika signifikan, maka terdapat
korelasi yang memadai untuk melanjutkan ke analisis faktor. |
KMO(mat_corr)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = mat_corr)
## Overall MSA = 0.26
## MSA for each item =
## Hours_Studied Attendance Access_to_Resources Sleep_Hours
## 0.22 0.27 0.13 0.57
## Previous_Scores Motivation_Level Tutoring_Sessions Family_Income
## 0.15 0.13 0.14 0.13
## Teacher_Quality Peer_Influence Physical_Activity Distance_from_Home
## 0.11 0.13 0.14 0.19
## Exam_Score
## 0.34
| Nilai KMO dihitung untuk mengukur kecukupan sampel dan kekuatan
korelasi parsial antarvariabel. Hasil ini digunakan untuk menentukan
apakah data layak dianalisis menggunakan Factor Analysis. |
##Analisis Principal Component Analysis (PCA)
pca_res <- PCA(vars_scaled, scale.unit = FALSE, graph = FALSE)
Tahap ini melakukan Principal Component Analysis untuk mereduksi
dimensi data dengan mengekstraksi komponen utama yang mampu menjelaskan
variasi terbesar dalam dataset.
eig <- get_eigenvalue(pca_res)
print(eig)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.8069351 13.901651 13.90165
## Dim.2 1.0626924 8.175822 22.07747
## Dim.3 1.0438262 8.030675 30.10815
## Dim.4 1.0374539 7.981650 38.08980
## Dim.5 1.0283442 7.911564 46.00136
## Dim.6 1.0059743 7.739461 53.74082
## Dim.7 1.0010694 7.701725 61.44255
## Dim.8 0.9934854 7.643378 69.08592
## Dim.9 0.9787973 7.530375 76.61630
## Dim.10 0.9706534 7.467720 84.08402
## Dim.11 0.9620919 7.401852 91.48587
## Dim.12 0.9266572 7.129236 98.61511
## Dim.13 0.1800081 1.384892 100.00000
fviz_eig(pca_res, addlabels = TRUE)
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

| Nilai eigenvalue dihitung untuk menentukan jumlah komponen yang
dipertahankan berdasarkan kriteria tertentu. Scree plot ditampilkan
untuk memvisualisasikan penurunan nilai eigenvalue antar komponen. |
fviz_pca_var(pca_res, repel = TRUE)

loadings_pca <- as.data.frame(pca_res$var$coord)
round(loadings_pca, 3)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Hours_Studied 0.521 0.367 0.368 0.263 0.263
## Attendance 0.676 -0.397 -0.002 -0.044 -0.272
## Access_to_Resources 0.189 0.429 -0.296 -0.245 -0.097
## Sleep_Hours -0.044 -0.020 0.496 0.121 -0.229
## Previous_Scores 0.210 0.548 -0.085 -0.361 -0.013
## Motivation_Level 0.099 0.055 -0.357 -0.053 -0.075
## Tutoring_Sessions 0.184 -0.421 -0.029 -0.225 0.238
## Family_Income 0.106 -0.118 -0.446 0.375 0.102
## Teacher_Quality 0.076 -0.030 0.231 0.167 -0.488
## Peer_Influence 0.110 0.002 -0.223 0.526 0.368
## Physical_Activity 0.011 -0.105 0.334 -0.310 0.613
## Distance_from_Home -0.122 0.282 0.157 0.408 0.017
## Exam_Score 0.953 0.002 0.017 0.009 0.002
fviz_contrib(pca_res, choice = "var", axes = 1, top = 8)

fviz_contrib(pca_res, choice = "var", axes = 2, top = 8)

| Loading variabel terhadap komponen utama dihitung dan
divisualisasikan untuk mengetahui kontribusi masing-masing variabel
dalam membentuk komponen utama. |
##Analisis Factor Analysis (FA)
fa.parallel(vars_scaled,
fa = "fa",
fm = "pa",
n.iter = 20,
show.legend = FALSE)
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## Parallel analysis suggests that the number of factors = 5 and the number of components = NA
fa_res <- fa(vars_scaled, nfactors = 3, rotate = "varimax", fm = "pa")
## maximum iteration exceeded
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(fa_res$loadings, cutoff = 0.4, sort = TRUE)
##
## Loadings:
## PA2 PA3 PA1
## Attendance 0.982
## Exam_Score 0.573 1.071 0.471
## Hours_Studied 0.855
## Access_to_Resources
## Sleep_Hours
## Previous_Scores
## Motivation_Level
## Tutoring_Sessions
## Family_Income
## Teacher_Quality
## Peer_Influence
## Physical_Activity
## Distance_from_Home
##
## PA2 PA3 PA1
## SS loadings 1.295 1.244 0.956
## Proportion Var 0.100 0.096 0.074
## Cumulative Var 0.100 0.195 0.269
fa.diagram(fa_res, simple = FALSE, cut = 0.5, digits = 3)

fa_loadings_full <- as.data.frame(unclass(fa_res$loadings))
knitr::kable(round(fa_loadings_full, 3))
| Hours_Studied |
-0.001 |
0.037 |
0.855 |
| Attendance |
0.982 |
0.015 |
-0.006 |
| Access_to_Resources |
-0.011 |
0.156 |
-0.004 |
| Sleep_Hours |
-0.012 |
-0.024 |
0.017 |
| Previous_Scores |
-0.016 |
0.152 |
0.028 |
| Motivation_Level |
0.000 |
0.085 |
-0.014 |
| Tutoring_Sessions |
0.017 |
0.131 |
-0.012 |
| Family_Income |
-0.007 |
0.085 |
0.002 |
| Teacher_Quality |
0.002 |
0.060 |
0.000 |
| Peer_Influence |
-0.002 |
0.078 |
0.013 |
| Physical_Activity |
-0.017 |
0.024 |
0.009 |
| Distance_from_Home |
-0.023 |
-0.077 |
0.018 |
| Exam_Score |
0.573 |
1.071 |
0.471 |
knitr::kable(round(fa_res$communality, 3))
| Hours_Studied |
0.733 |
| Attendance |
0.965 |
| Access_to_Resources |
0.025 |
| Sleep_Hours |
0.001 |
| Previous_Scores |
0.024 |
| Motivation_Level |
0.007 |
| Tutoring_Sessions |
0.018 |
| Family_Income |
0.007 |
| Teacher_Quality |
0.004 |
| Peer_Influence |
0.006 |
| Physical_Activity |
0.001 |
| Distance_from_Home |
0.007 |
| Exam_Score |
1.697 |
| Tahap ini dimulai dengan parallel analysis untuk menentukan jumlah
faktor yang optimal. Selanjutnya dilakukan ekstraksi faktor menggunakan
metode Principal Axis Factoring dengan rotasi Varimax untuk memperoleh
struktur faktor yang lebih jelas. Ditampilkan nilai loading faktor baik
dengan cutoff (untuk interpretasi utama) maupun tanpa cutoff (untk
informasi lengkap), serta nilai komunalitas untuk melihat seberapa besar
varians tiap variabel dapat dijelaskan oleh faktor yang terbentuk. |