library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(corrplot)
## corrplot 0.95 loaded
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(knitr)
data <- read.csv("Student_performance_data _.csv", sep=";")
View(data)
str(data)
## 'data.frame': 2392 obs. of 15 variables:
## $ StudentID : int 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 ...
## $ Age : int 17 18 15 17 17 18 15 15 17 16 ...
## $ Gender : int 1 0 0 1 1 0 0 1 0 1 ...
## $ Ethnicity : int 0 0 2 0 0 0 1 1 0 0 ...
## $ ParentalEducation: int 2 1 3 3 2 1 1 4 0 1 ...
## $ StudyTimeWeekly : chr "19.833.722.807.854.700" "1.540.875.605.584.670" "421.056.976.881.226" "10.028.829.473.958.200" ...
## $ Absences : int 7 0 26 14 17 0 10 22 1 0 ...
## $ Tutoring : int 1 0 0 0 1 0 0 1 0 0 ...
## $ ParentalSupport : int 2 1 2 3 3 1 3 1 2 3 ...
## $ Extracurricular : int 0 0 0 1 0 1 0 1 0 1 ...
## $ Sports : int 0 0 0 0 0 0 1 0 1 0 ...
## $ Music : int 1 0 0 0 0 0 0 0 0 0 ...
## $ Volunteering : int 0 0 0 0 0 0 0 0 1 0 ...
## $ GPA : chr "2.929.195.591.667.680" "3.042.914.833.436.370" "0.1126022544661815" "20.542.181.397.029.400" ...
## $ GradeClass : num 2 1 4 3 4 1 2 4 2 0 ...
summary(data)
## StudentID Age Gender Ethnicity
## Min. :1001 Min. :15.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:1599 1st Qu.:15.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2196 Median :16.00 Median :1.0000 Median :0.0000
## Mean :2196 Mean :16.47 Mean :0.5109 Mean :0.8775
## 3rd Qu.:2794 3rd Qu.:17.00 3rd Qu.:1.0000 3rd Qu.:2.0000
## Max. :3392 Max. :18.00 Max. :1.0000 Max. :3.0000
## ParentalEducation StudyTimeWeekly Absences Tutoring
## Min. :0.000 Length:2392 Min. : 0.00 Min. :0.0000
## 1st Qu.:1.000 Class :character 1st Qu.: 7.00 1st Qu.:0.0000
## Median :2.000 Mode :character Median :15.00 Median :0.0000
## Mean :1.746 Mean :14.54 Mean :0.3014
## 3rd Qu.:2.000 3rd Qu.:22.00 3rd Qu.:1.0000
## Max. :4.000 Max. :29.00 Max. :1.0000
## ParentalSupport Extracurricular Sports Music
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :2.122 Mean :0.3834 Mean :0.3035 Mean :0.1969
## 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :4.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Volunteering GPA GradeClass
## Min. :0.0000 Length:2392 Min. :0.000
## 1st Qu.:0.0000 Class :character 1st Qu.:2.000
## Median :0.0000 Mode :character Median :4.000
## Mean :0.1572 Mean :2.984
## 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :1.0000 Max. :4.000
names(data)
## [1] "StudentID" "Age" "Gender"
## [4] "Ethnicity" "ParentalEducation" "StudyTimeWeekly"
## [7] "Absences" "Tutoring" "ParentalSupport"
## [10] "Extracurricular" "Sports" "Music"
## [13] "Volunteering" "GPA" "GradeClass"
data_numeric <- data[sapply(data, is.numeric)]
data_numeric <- data_numeric[, !names(data_numeric) %in% "StudentID"]
str(data_numeric)
## 'data.frame': 2392 obs. of 12 variables:
## $ Age : int 17 18 15 17 17 18 15 15 17 16 ...
## $ Gender : int 1 0 0 1 1 0 0 1 0 1 ...
## $ Ethnicity : int 0 0 2 0 0 0 1 1 0 0 ...
## $ ParentalEducation: int 2 1 3 3 2 1 1 4 0 1 ...
## $ Absences : int 7 0 26 14 17 0 10 22 1 0 ...
## $ Tutoring : int 1 0 0 0 1 0 0 1 0 0 ...
## $ ParentalSupport : int 2 1 2 3 3 1 3 1 2 3 ...
## $ Extracurricular : int 0 0 0 1 0 1 0 1 0 1 ...
## $ Sports : int 0 0 0 0 0 0 1 0 1 0 ...
## $ Music : int 1 0 0 0 0 0 0 0 0 0 ...
## $ Volunteering : int 0 0 0 0 0 0 0 0 1 0 ...
## $ GradeClass : num 2 1 4 3 4 1 2 4 2 0 ...
summary(data_numeric)
## Age Gender Ethnicity ParentalEducation
## Min. :15.00 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:15.00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.000
## Median :16.00 Median :1.0000 Median :0.0000 Median :2.000
## Mean :16.47 Mean :0.5109 Mean :0.8775 Mean :1.746
## 3rd Qu.:17.00 3rd Qu.:1.0000 3rd Qu.:2.0000 3rd Qu.:2.000
## Max. :18.00 Max. :1.0000 Max. :3.0000 Max. :4.000
## Absences Tutoring ParentalSupport Extracurricular
## Min. : 0.00 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.: 7.00 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:0.0000
## Median :15.00 Median :0.0000 Median :2.000 Median :0.0000
## Mean :14.54 Mean :0.3014 Mean :2.122 Mean :0.3834
## 3rd Qu.:22.00 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :29.00 Max. :1.0000 Max. :4.000 Max. :1.0000
## Sports Music Volunteering GradeClass
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:2.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :4.000
## Mean :0.3035 Mean :0.1969 Mean :0.1572 Mean :2.984
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :4.000
describe(data_numeric)
## vars n mean sd median trimmed mad min max range skew
## Age 1 2392 16.47 1.12 16 16.46 1.48 15 18 3 0.04
## Gender 2 2392 0.51 0.50 1 0.51 0.00 0 1 1 -0.04
## Ethnicity 3 2392 0.88 1.03 0 0.73 0.00 0 3 3 0.76
## ParentalEducation 4 2392 1.75 1.00 2 1.75 1.48 0 4 4 0.22
## Absences 5 2392 14.54 8.47 15 14.57 10.38 0 29 29 -0.03
## Tutoring 6 2392 0.30 0.46 0 0.25 0.00 0 1 1 0.86
## ParentalSupport 7 2392 2.12 1.12 2 2.14 1.48 0 4 4 -0.17
## Extracurricular 8 2392 0.38 0.49 0 0.35 0.00 0 1 1 0.48
## Sports 9 2392 0.30 0.46 0 0.25 0.00 0 1 1 0.85
## Music 10 2392 0.20 0.40 0 0.12 0.00 0 1 1 1.52
## Volunteering 11 2392 0.16 0.36 0 0.07 0.00 0 1 1 1.88
## GradeClass 12 2392 2.98 1.23 4 3.16 0.00 0 4 4 -0.90
## kurtosis se
## Age -1.37 0.02
## Gender -2.00 0.01
## Ethnicity -0.77 0.02
## ParentalEducation -0.29 0.02
## Absences -1.18 0.17
## Tutoring -1.25 0.01
## ParentalSupport -0.73 0.02
## Extracurricular -1.77 0.01
## Sports -1.27 0.01
## Music 0.32 0.01
## Volunteering 1.54 0.01
## GradeClass -0.42 0.03
kable(describe(data_numeric))
| Age |
1 |
2392 |
16.4686455 |
1.1237984 |
16 |
16.4608150 |
1.4826 |
15 |
18 |
3 |
0.0357485 |
-1.3731015 |
0.0229778 |
| Gender |
2 |
2392 |
0.5108696 |
0.4999864 |
1 |
0.5135841 |
0.0000 |
0 |
1 |
1 |
-0.0434613 |
-1.9989463 |
0.0102230 |
| Ethnicity |
3 |
2392 |
0.8775084 |
1.0284758 |
0 |
0.7309300 |
0.0000 |
0 |
3 |
3 |
0.7581156 |
-0.7703030 |
0.0210288 |
| ParentalEducation |
4 |
2392 |
1.7462375 |
1.0004111 |
2 |
1.7450366 |
1.4826 |
0 |
4 |
4 |
0.2153796 |
-0.2903732 |
0.0204549 |
| Absences |
5 |
2392 |
14.5413880 |
8.4674174 |
15 |
14.5731452 |
10.3782 |
0 |
29 |
29 |
-0.0259845 |
-1.1778001 |
0.1731292 |
| Tutoring |
6 |
2392 |
0.3014214 |
0.4589712 |
0 |
0.2518286 |
0.0000 |
0 |
1 |
1 |
0.8649587 |
-1.2523696 |
0.0093844 |
| ParentalSupport |
7 |
2392 |
2.1220736 |
1.1228129 |
2 |
2.1384535 |
1.4826 |
0 |
4 |
4 |
-0.1666495 |
-0.7254322 |
0.0229576 |
| Extracurricular |
8 |
2392 |
0.3833612 |
0.4863068 |
0 |
0.3542320 |
0.0000 |
0 |
1 |
1 |
0.4794917 |
-1.7708275 |
0.0099433 |
| Sports |
9 |
2392 |
0.3035117 |
0.4598704 |
0 |
0.2544410 |
0.0000 |
0 |
1 |
1 |
0.8541805 |
-1.2709065 |
0.0094027 |
| Music |
10 |
2392 |
0.1969064 |
0.3977441 |
0 |
0.1212121 |
0.0000 |
0 |
1 |
1 |
1.5234264 |
0.3209626 |
0.0081325 |
| Volunteering |
11 |
2392 |
0.1571906 |
0.3640565 |
0 |
0.0715778 |
0.0000 |
0 |
1 |
1 |
1.8824882 |
1.5444080 |
0.0074437 |
| GradeClass |
12 |
2392 |
2.9836957 |
1.2339076 |
4 |
3.1603971 |
0.0000 |
0 |
4 |
4 |
-0.8996778 |
-0.4204059 |
0.0252291 |
boxplot(data_numeric,
col="lightblue",
main="Boxplot Variabel",
las=2)

data_scaled <- scale(data_numeric)
boxplot(data_scaled,
col="lightgreen",
main="Boxplot Data Setelah Standardisasi",
las=2)

cor_matrix <- cor(data_numeric)
cor_matrix
## Age Gender Ethnicity ParentalEducation
## Age 1.000000000 0.0448952880 -0.028473460 0.025098637
## Gender 0.044895288 1.0000000000 0.016010276 0.006770991
## Ethnicity -0.028473460 0.0160102761 1.000000000 0.033595258
## ParentalEducation 0.025098637 0.0067709908 0.033595258 1.000000000
## Absences -0.011510913 0.0214792249 -0.025711779 0.036517503
## Tutoring -0.012076476 -0.0315972760 -0.017439893 -0.017340423
## ParentalSupport 0.033197053 0.0080654091 0.020921990 -0.017463038
## Extracurricular -0.025061399 -0.0059642360 -0.008926696 0.007479342
## Sports -0.046320217 -0.0088971624 -0.004484258 0.002028728
## Music -0.003491987 0.0071093692 -0.014626961 0.039439007
## Volunteering 0.013074043 -0.0001998001 0.013467644 0.011959707
## GradeClass -0.006250264 0.0229977624 -0.023325822 0.041031288
## Absences Tutoring ParentalSupport Extracurricular
## Age -0.011510913 -0.0120764764 0.0331970528 -0.025061399
## Gender 0.021479225 -0.0315972760 0.0080654091 -0.005964236
## Ethnicity -0.025711779 -0.0174398934 0.0209219902 -0.008926696
## ParentalEducation 0.036517503 -0.0173404233 -0.0174630376 0.007479342
## Absences 1.000000000 -0.0155336258 0.0021078077 0.000360288
## Tutoring -0.015533626 1.0000000000 -0.0008237873 0.004865472
## ParentalSupport 0.002107808 -0.0008237873 1.0000000000 -0.008380665
## Extracurricular 0.000360288 0.0048654722 -0.0083806650 1.000000000
## Sports 0.041454120 0.0062775898 -0.0061764833 -0.011819738
## Music -0.008692102 -0.0113852421 0.0351220039 -0.014191136
## Volunteering -0.018528067 -0.0508977428 -0.0060363113 -0.007426736
## GradeClass 0.728632710 -0.1116945788 -0.1368225942 -0.069733244
## Sports Music Volunteering GradeClass
## Age -0.046320217 -0.003491987 0.0130740429 -0.006250264
## Gender -0.008897162 0.007109369 -0.0001998001 0.022997762
## Ethnicity -0.004484258 -0.014626961 0.0134676440 -0.023325822
## ParentalEducation 0.002028728 0.039439007 0.0119597071 0.041031288
## Absences 0.041454120 -0.008692102 -0.0185280669 0.728632710
## Tutoring 0.006277590 -0.011385242 -0.0508977428 -0.111694579
## ParentalSupport -0.006176483 0.035122004 -0.0060363113 -0.136822594
## Extracurricular -0.011819738 -0.014191136 -0.0074267358 -0.069733244
## Sports 1.000000000 -0.020473824 -0.0027989156 -0.026654304
## Music -0.020473824 1.000000000 0.0172237617 -0.036065038
## Volunteering -0.002798916 0.017223762 1.0000000000 0.013156020
## GradeClass -0.026654304 -0.036065038 0.0131560196 1.000000000
corrplot(cor_matrix,
method="color",
type="upper",
tl.col="black",
tl.srt=45)

cov_matrix <- cov(data_numeric)
cov_matrix
## Age Gender Ethnicity ParentalEducation
## Age 1.262922799 2.522594e-02 -0.032909608 0.0282174025
## Gender 0.025225938 2.499864e-01 0.008232866 0.0033867947
## Ethnicity -0.032909608 8.232866e-03 1.057762422 0.0345661126
## ParentalEducation 0.028217402 3.386795e-03 0.034566113 1.0008223075
## Absences -0.109534046 9.093430e-02 -0.223911889 0.3093360484
## Tutoring -0.006228940 -7.250923e-03 -0.008232341 -0.0079620273
## ParentalSupport 0.041888548 4.527849e-03 0.024160418 -0.0196157833
## Extracurricular -0.013696324 -1.450185e-03 -0.004464729 0.0036387498
## Sports -0.023938361 -2.045715e-03 -0.002120899 0.0009333356
## Music -0.001560863 1.413816e-03 -0.005983454 0.0156930812
## Volunteering 0.005348933 -3.636827e-05 0.005042600 0.0043557991
## GradeClass -0.008667012 1.418817e-02 -0.029601495 0.0506496281
## Absences Tutoring ParentalSupport Extracurricular
## Age -0.10953405 -0.0062289396 0.0418885481 -0.013696324
## Gender 0.09093430 -0.0072509228 0.0045278490 -0.001450185
## Ethnicity -0.22391189 -0.0082323415 0.0241604176 -0.004464729
## ParentalEducation 0.30933605 -0.0079620273 -0.0196157833 0.003638750
## Absences 71.69715709 -0.0603683476 0.0200396134 0.001483580
## Tutoring -0.06036835 0.2106546078 -0.0004245296 0.001085977
## ParentalSupport 0.02003961 -0.0004245296 1.2607087056 -0.004576107
## Extracurricular 0.00148358 0.0010859774 -0.0045761069 0.236494260
## Sports 0.16141880 0.0013249938 -0.0031892171 -0.002643343
## Music -0.02927383 -0.0020784114 0.0156852131 -0.002744930
## Volunteering -0.05711496 -0.0085045789 -0.0024674469 -0.001314853
## GradeClass 7.61276208 -0.0632557780 -0.1895604895 -0.041843962
## Sports Music Volunteering GradeClass
## Age -0.0239383614 -0.001560863 5.348933e-03 -0.008667012
## Gender -0.0020457149 0.001413816 -3.636827e-05 0.014188169
## Ethnicity -0.0021208993 -0.005983454 5.042600e-03 -0.029601495
## ParentalEducation 0.0009333356 0.015693081 4.355799e-03 0.050649628
## Absences 0.1614187960 -0.029273831 -5.711496e-02 7.612762079
## Tutoring 0.0013249938 -0.002078411 -8.504579e-03 -0.063255778
## ParentalSupport -0.0031892171 0.015685213 -2.467447e-03 -0.189560490
## Extracurricular -0.0026433434 -0.002744930 -1.314853e-03 -0.041843962
## Sports 0.2114807619 -0.003744882 -4.685911e-04 -0.015124652
## Music -0.0037448822 0.158200379 2.494024e-03 -0.017699980
## Volunteering -0.0004685911 0.002494024 1.325371e-01 0.005909843
## GradeClass -0.0151246522 -0.017699980 5.909843e-03 1.522527867
cortest.bartlett(cor_matrix,
n=nrow(data_numeric))
## $chisq
## [1] 2071.714
##
## $p.value
## [1] 0
##
## $df
## [1] 66
KMO(cor_matrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor_matrix)
## Overall MSA = 0.46
## MSA for each item =
## Age Gender Ethnicity ParentalEducation
## 0.50 0.57 0.52 0.59
## Absences Tutoring ParentalSupport Extracurricular
## 0.47 0.32 0.24 0.23
## Sports Music Volunteering GradeClass
## 0.21 0.44 0.44 0.47
pca_result <- prcomp(data_numeric,
scale=TRUE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.328 1.05790 1.02743 1.01311 1.00980 1.00658 0.99608
## Proportion of Variance 0.147 0.09326 0.08797 0.08553 0.08498 0.08443 0.08268
## Cumulative Proportion 0.147 0.24022 0.32819 0.41372 0.49870 0.58313 0.66581
## PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.99245 0.98275 0.96781 0.93710 0.49468
## Proportion of Variance 0.08208 0.08048 0.07805 0.07318 0.02039
## Cumulative Proportion 0.74789 0.82837 0.90643 0.97961 1.00000
eigen_values <- pca_result$sdev^2
eigen_values
## [1] 1.7634820 1.1191451 1.0556160 1.0264012 1.0197040 1.0132018 0.9921767
## [8] 0.9849496 0.9658043 0.9366578 0.8781572 0.2447043
prop_var <- eigen_values / sum(eigen_values)
prop_var
## [1] 0.14695684 0.09326209 0.08796800 0.08553343 0.08497533 0.08443348
## [7] 0.08268139 0.08207914 0.08048369 0.07805482 0.07317977 0.02039202
plot(eigen_values,
type="b",
xlab="Komponen Utama",
ylab="Eigenvalue",
main="Scree Plot PCA")

eigen_table <- data.frame(
Komponen = paste("PC",1:length(eigen_values)),
Eigenvalue = eigen_values,
Proporsi_Variansi = prop_var
)
kable(eigen_table)
| PC 1 |
1.7634820 |
0.1469568 |
| PC 2 |
1.1191451 |
0.0932621 |
| PC 3 |
1.0556160 |
0.0879680 |
| PC 4 |
1.0264012 |
0.0855334 |
| PC 5 |
1.0197040 |
0.0849753 |
| PC 6 |
1.0132018 |
0.0844335 |
| PC 7 |
0.9921767 |
0.0826814 |
| PC 8 |
0.9849496 |
0.0820791 |
| PC 9 |
0.9658043 |
0.0804837 |
| PC 10 |
0.9366578 |
0.0780548 |
| PC 11 |
0.8781572 |
0.0731798 |
| PC 12 |
0.2447043 |
0.0203920 |
fa_result <- fa(data_numeric,
nfactors=3,
rotate="varimax")
fa_result
## Factor Analysis using method = minres
## Call: fa(r = data_numeric, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## Age -0.01 0.14 0.14 0.0380 0.9620 2.0
## Gender 0.02 0.11 0.09 0.0202 0.9798 2.0
## Ethnicity -0.03 0.03 0.03 0.0023 0.9977 2.9
## ParentalEducation 0.04 0.08 0.05 0.0093 0.9907 2.1
## Absences 0.99 -0.08 0.07 0.9964 0.0036 1.0
## Tutoring -0.03 -0.20 -0.01 0.0406 0.9594 1.1
## ParentalSupport -0.02 -0.04 0.25 0.0641 0.9359 1.1
## Extracurricular -0.02 -0.10 0.01 0.0096 0.9904 1.1
## Sports 0.03 -0.12 -0.03 0.0162 0.9838 1.2
## Music -0.01 0.05 0.11 0.0147 0.9853 1.4
## Volunteering -0.02 0.11 0.03 0.0124 0.9876 1.2
## GradeClass 0.80 0.44 -0.41 0.9972 0.0028 2.1
##
## MR1 MR2 MR3
## SS loadings 1.63 0.31 0.28
## Proportion Var 0.14 0.03 0.02
## Cumulative Var 0.14 0.16 0.19
## Proportion Explained 0.73 0.14 0.13
## Cumulative Proportion 0.73 0.87 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 66 with the objective function = 0.87 with Chi Square = 2071.71
## df of the model are 33 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.02
##
## The harmonic n.obs is 2392 with the empirical chi square 26.84 with prob < 0.77
## The total n.obs was 2392 with Likelihood Chi Square = 33.79 with prob < 0.43
##
## Tucker Lewis Index of factoring reliability = 0.999
## RMSEA index = 0.003 and the 90 % confidence intervals are 0 0.015
## BIC = -222.94
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of (regression) scores with factors 1.00 0.77 0.73
## Multiple R square of scores with factors 1.00 0.59 0.53
## Minimum correlation of possible factor scores 0.99 0.18 0.06
fa_result$loadings
##
## Loadings:
## MR1 MR2 MR3
## Age 0.138 0.137
## Gender 0.106
## Ethnicity
## ParentalEducation
## Absences 0.992
## Tutoring -0.198
## ParentalSupport 0.249
## Extracurricular
## Sports -0.120
## Music 0.110
## Volunteering 0.106
## GradeClass 0.798 0.436 -0.412
##
## MR1 MR2 MR3
## SS loadings 1.627 0.312 0.282
## Proportion Var 0.136 0.026 0.023
## Cumulative Var 0.136 0.162 0.185
fa.diagram(fa_result)
