Dataset –> Student Performance Factors Link Data –> https://www.kaggle.com/datasets/lainguyn123/student-performance-factors/data
system("apt-get update")
## Warning in system("apt-get update"): 'apt-get' not found
## [1] 127
system("apt-get install -y libgsl-dev")
## Warning in system("apt-get install -y libgsl-dev"): 'apt-get' not found
## [1] 127
library(psych)
## Warning: package 'psych' was built under R version 4.5.3
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## ---
## biotools version 4.3
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
data <- read.csv("StudentPerformanceFactors.csv")
head(data)
## Hours_Studied Attendance Parental_Involvement Access_to_Resources
## 1 23 84 Low High
## 2 19 64 Low Medium
## 3 24 98 Medium Medium
## 4 29 89 Low Medium
## 5 19 92 Medium Medium
## 6 19 88 Medium Medium
## Extracurricular_Activities Sleep_Hours Previous_Scores Motivation_Level
## 1 No 7 73 Low
## 2 No 8 59 Low
## 3 Yes 7 91 Medium
## 4 Yes 8 98 Medium
## 5 Yes 6 65 Medium
## 6 Yes 8 89 Medium
## Internet_Access Tutoring_Sessions Family_Income Teacher_Quality School_Type
## 1 Yes 0 Low Medium Public
## 2 Yes 2 Medium Medium Public
## 3 Yes 2 Medium Medium Public
## 4 Yes 1 Medium Medium Public
## 5 Yes 3 Medium High Public
## 6 Yes 3 Medium Medium Public
## Peer_Influence Physical_Activity Learning_Disabilities
## 1 Positive 3 No
## 2 Negative 4 No
## 3 Neutral 4 No
## 4 Negative 4 No
## 5 Neutral 4 No
## 6 Positive 3 No
## Parental_Education_Level Distance_from_Home Gender Exam_Score
## 1 High School Near Male 67
## 2 College Moderate Female 61
## 3 Postgraduate Near Male 74
## 4 High School Moderate Male 71
## 5 College Near Female 70
## 6 Postgraduate Near Male 71
colSums(is.na(data))
## Hours_Studied Attendance
## 0 0
## Parental_Involvement Access_to_Resources
## 0 0
## Extracurricular_Activities Sleep_Hours
## 0 0
## Previous_Scores Motivation_Level
## 0 0
## Internet_Access Tutoring_Sessions
## 0 0
## Family_Income Teacher_Quality
## 0 0
## School_Type Peer_Influence
## 0 0
## Physical_Activity Learning_Disabilities
## 0 0
## Parental_Education_Level Distance_from_Home
## 0 0
## Gender Exam_Score
## 0 0
summary(data[, c("Exam_Score", "Attendance", "Previous_Scores")])
## Exam_Score Attendance Previous_Scores
## Min. : 55.00 Min. : 60.00 Min. : 50.00
## 1st Qu.: 65.00 1st Qu.: 70.00 1st Qu.: 63.00
## Median : 67.00 Median : 80.00 Median : 75.00
## Mean : 67.24 Mean : 79.98 Mean : 75.07
## 3rd Qu.: 69.00 3rd Qu.: 90.00 3rd Qu.: 88.00
## Max. :101.00 Max. :100.00 Max. :100.00
selected_cols <- c("Exam_Score", "Attendance", "Parental_Education_Level",
"Motivation_Level", "Internet_Access", "Gender",
"School_Type", "Hours_Studied", "Previous_Scores")
df_full <- na.omit(data[, selected_cols])
set.seed(123)
df <- df_full[sample(nrow(df_full), 100), ]
row.names(df) <- NULL
dependent_vars <- df[, c("Exam_Score", "Attendance")]
dist_mahala <- mahalanobis(dependent_vars,
colMeans(dependent_vars),
cov(dependent_vars))
cutoff <- qchisq(p = 0.999, df = ncol(dependent_vars))
outliers <- which(dist_mahala > cutoff)
print(paste("Jumlah outlier yang terdeteksi:", length(outliers)))
## [1] "Jumlah outlier yang terdeteksi: 0"
par(mfrow=c(1,2))
boxplot(df$Exam_Score, main="Distribusi Exam Score", col="orange")
boxplot(df$Attendance, main="Distribusi Attendance", col="orange")
boxplot(df$Hours_Studied, main="Distribusi Hours Studied", col="lightblue")
boxplot(df$Previous_Scores, main="Distribusi Privious Scores", col="lightblue")
if(length(outliers) > 0) {
df_clean <- df[-outliers, ]
} else {
df_clean <- df
}
print(paste("Data awal:", nrow(df)))
## [1] "Data awal: 100"
print(paste("Data setelah dibersihkan:", nrow(df_clean)))
## [1] "Data setelah dibersihkan: 100"
df_clean
## Exam_Score Attendance Parental_Education_Level Motivation_Level
## 1 68 85 College Low
## 2 66 81 Postgraduate Medium
## 3 74 97 College Medium
## 4 63 79 High School High
## 5 71 89 Postgraduate High
## 6 67 78 College High
## 7 62 73 Postgraduate Medium
## 8 74 97 High School Low
## 9 67 88 High School Medium
## 10 63 67 College Medium
## 11 64 69 High School Medium
## 12 72 94 High School Low
## 13 59 64 College Medium
## 14 63 71 College Medium
## 15 63 60 High School High
## 16 69 79 Postgraduate Medium
## 17 68 97 College Medium
## 18 63 67 Postgraduate Low
## 19 71 94 Postgraduate Low
## 20 70 77 Postgraduate High
## 21 66 69 High School Medium
## 22 61 66 College Medium
## 23 68 89 Postgraduate Low
## 24 71 84 College Medium
## 25 64 62 High School High
## 26 64 76 High School Medium
## 27 69 92 High School Medium
## 28 73 94 College Medium
## 29 70 89 College Low
## 30 65 66 Postgraduate High
## 31 72 97 College High
## 32 65 84 High School Low
## 33 64 72 College Medium
## 34 70 97 College Low
## 35 69 79 Postgraduate Medium
## 36 69 64 Postgraduate Low
## 37 62 84 High School Low
## 38 68 77 High School Medium
## 39 68 67 College Medium
## 40 71 98 Postgraduate Medium
## 41 69 78 High School Medium
## 42 73 89 High School Medium
## 43 73 84 Postgraduate Low
## 44 66 77 High School High
## 45 65 81 High School Low
## 46 65 78 College Medium
## 47 69 92 High School Medium
## 48 67 87 High School Medium
## 49 68 87 College Medium
## 50 69 92 College Medium
## 51 67 76 Postgraduate Medium
## 52 65 94 High School Medium
## 53 66 89 College Low
## 54 61 61 College Medium
## 55 66 74 College Medium
## 56 68 94 College Medium
## 57 70 91 High School Low
## 58 66 71 College Medium
## 59 67 78 College High
## 60 68 76 High School Low
## 61 70 84 High School Low
## 62 72 88 High School Medium
## 63 75 91 College Low
## 64 67 68 College Low
## 65 65 96 High School Low
## 66 69 80 Postgraduate Medium
## 67 72 88 High School Low
## 68 68 87 Postgraduate Medium
## 69 66 74 High School Low
## 70 66 69 College Medium
## 71 67 62 College Medium
## 72 64 68 High School Medium
## 73 65 73 Postgraduate Medium
## 74 72 96 Postgraduate Medium
## 75 67 98 High School Medium
## 76 71 98 College Medium
## 77 69 94 High School Low
## 78 59 61 High School Medium
## 79 69 91 Postgraduate High
## 80 69 78 Medium
## 81 66 68 High School Low
## 82 68 81 College Low
## 83 63 89 High School Low
## 84 67 80 High School High
## 85 62 62 High School High
## 86 63 60 High School Low
## 87 72 90 High School High
## 88 65 67 College Low
## 89 65 70 High School Medium
## 90 63 73 Postgraduate Low
## 91 58 62 College Low
## 92 67 94 College Medium
## 93 65 67 High School Medium
## 94 69 79 Postgraduate Medium
## 95 71 87 Postgraduate Medium
## 96 66 66 High School High
## 97 67 81 College Low
## 98 65 68 High School Medium
## 99 61 61 College Medium
## 100 71 89 Postgraduate High
## Internet_Access Gender School_Type Hours_Studied Previous_Scores
## 1 No Male Public 23 66
## 2 Yes Female Public 23 74
## 3 Yes Female Public 29 86
## 4 Yes Male Public 9 90
## 5 Yes Male Public 23 88
## 6 Yes Female Public 14 69
## 7 No Male Private 9 95
## 8 Yes Female Public 28 93
## 9 No Male Public 20 50
## 10 Yes Male Private 8 92
## 11 Yes Female Public 14 71
## 12 Yes Male Public 23 76
## 13 Yes Female Public 13 88
## 14 Yes Male Public 12 100
## 15 Yes Female Public 17 50
## 16 Yes Male Public 21 87
## 17 Yes Male Public 14 73
## 18 Yes Female Private 21 70
## 19 Yes Female Private 12 91
## 20 Yes Male Private 24 65
## 21 Yes Female Private 16 85
## 22 Yes Female Public 15 53
## 23 Yes Male Public 19 65
## 24 Yes Male Public 28 81
## 25 No Male Public 23 91
## 26 Yes Male Public 14 71
## 27 Yes Female Public 20 89
## 28 Yes Male Private 21 94
## 29 Yes Male Private 28 71
## 30 No Male Public 16 68
## 31 Yes Male Public 21 71
## 32 Yes Male Public 20 96
## 33 Yes Male Private 22 71
## 34 Yes Male Public 23 80
## 35 Yes Male Public 21 99
## 36 Yes Male Private 28 84
## 37 Yes Male Private 17 71
## 38 Yes Female Private 26 90
## 39 Yes Male Public 26 94
## 40 Yes Male Public 21 88
## 41 Yes Male Public 33 86
## 42 Yes Female Private 24 99
## 43 Yes Female Public 31 98
## 44 Yes Male Public 19 86
## 45 No Female Public 15 68
## 46 Yes Male Public 8 84
## 47 Yes Female Public 17 66
## 48 No Male Public 22 68
## 49 Yes Male Private 15 52
## 50 Yes Female Public 23 72
## 51 Yes Male Public 17 71
## 52 Yes Female Private 10 65
## 53 Yes Male Public 15 82
## 54 Yes Male Public 20 63
## 55 Yes Male Public 29 64
## 56 Yes Male Public 15 64
## 57 Yes Female Public 27 70
## 58 No Female Public 27 51
## 59 Yes Male Public 18 85
## 60 Yes Female Private 23 96
## 61 Yes Male Public 25 84
## 62 Yes Female Public 21 93
## 63 Yes Female Public 30 95
## 64 Yes Male Public 24 56
## 65 Yes Male Public 5 72
## 66 Yes Male Private 20 81
## 67 Yes Female Public 25 95
## 68 Yes Male Public 20 75
## 69 Yes Male Private 16 74
## 70 Yes Female Private 18 85
## 71 Yes Male Public 26 75
## 72 Yes Male Private 12 75
## 73 Yes Male Public 15 63
## 74 No Male Private 26 63
## 75 Yes Female Private 12 54
## 76 Yes Female Private 15 61
## 77 Yes Female Public 24 75
## 78 Yes Female Private 8 78
## 79 Yes Male Public 19 59
## 80 Yes Female Public 19 89
## 81 Yes Female Public 20 76
## 82 Yes Male Public 23 59
## 83 No Female Private 14 67
## 84 Yes Female Private 20 86
## 85 Yes Male Public 14 97
## 86 Yes Male Public 22 92
## 87 Yes Male Public 27 96
## 88 Yes Female Private 20 59
## 89 Yes Male Public 24 71
## 90 No Male Private 15 61
## 91 Yes Male Public 7 51
## 92 Yes Female Private 14 71
## 93 Yes Male Private 13 76
## 94 Yes Male Private 16 91
## 95 Yes Female Public 23 76
## 96 Yes Female Public 20 95
## 97 Yes Female Public 22 80
## 98 Yes Female Public 19 66
## 99 Yes Female Public 19 67
## 100 Yes Male Public 25 82
str(df_clean)
## 'data.frame': 100 obs. of 9 variables:
## $ Exam_Score : int 68 66 74 63 71 67 62 74 67 63 ...
## $ Attendance : int 85 81 97 79 89 78 73 97 88 67 ...
## $ Parental_Education_Level: chr "College" "Postgraduate" "College" "High School" ...
## $ Motivation_Level : chr "Low" "Medium" "Medium" "High" ...
## $ Internet_Access : chr "No" "Yes" "Yes" "Yes" ...
## $ Gender : chr "Male" "Female" "Female" "Male" ...
## $ School_Type : chr "Public" "Public" "Public" "Public" ...
## $ Hours_Studied : int 23 23 29 9 23 14 9 28 20 8 ...
## $ Previous_Scores : int 66 74 86 90 88 69 95 93 50 92 ...
independen_cols <- c("Parental_Education_Level", "Motivation_Level",
"Internet_Access", "Gender", "School_Type")
for (col in independen_cols) {
df_clean <- df_clean[df_clean[[col]] != "", ]
}
df_clean[independen_cols] <- lapply(df_clean[independen_cols], as.factor)
df_clean <- droplevels(df_clean)
dependent_vars <- df_clean[, c("Exam_Score", "Attendance")]
kovariat <- df_clean[, c("Hours_Studied", "Previous_Scores")]
faktor1 <- df_clean$Parental_Education_Level
faktor2 <- df_clean$Motivation_Level
faktor3 <- df_clean$Internet_Access
faktor4 <- df_clean$Gender
faktor5 <- df_clean$School_Type
print(paste("Jumlah baris akhir:", nrow(df_clean)))
## [1] "Jumlah baris akhir: 99"
str(df_clean[independen_cols])
## 'data.frame': 99 obs. of 5 variables:
## $ Parental_Education_Level: Factor w/ 3 levels "College","High School",..: 1 3 1 2 3 1 3 2 2 1 ...
## $ Motivation_Level : Factor w/ 3 levels "High","Low","Medium": 2 3 3 1 1 1 3 2 3 3 ...
## $ Internet_Access : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 1 2 1 2 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 1 1 2 2 1 2 1 2 2 ...
## $ School_Type : Factor w/ 2 levels "Private","Public": 2 2 2 2 2 2 1 2 2 1 ...
df_clean
## Exam_Score Attendance Parental_Education_Level Motivation_Level
## 1 68 85 College Low
## 2 66 81 Postgraduate Medium
## 3 74 97 College Medium
## 4 63 79 High School High
## 5 71 89 Postgraduate High
## 6 67 78 College High
## 7 62 73 Postgraduate Medium
## 8 74 97 High School Low
## 9 67 88 High School Medium
## 10 63 67 College Medium
## 11 64 69 High School Medium
## 12 72 94 High School Low
## 13 59 64 College Medium
## 14 63 71 College Medium
## 15 63 60 High School High
## 16 69 79 Postgraduate Medium
## 17 68 97 College Medium
## 18 63 67 Postgraduate Low
## 19 71 94 Postgraduate Low
## 20 70 77 Postgraduate High
## 21 66 69 High School Medium
## 22 61 66 College Medium
## 23 68 89 Postgraduate Low
## 24 71 84 College Medium
## 25 64 62 High School High
## 26 64 76 High School Medium
## 27 69 92 High School Medium
## 28 73 94 College Medium
## 29 70 89 College Low
## 30 65 66 Postgraduate High
## 31 72 97 College High
## 32 65 84 High School Low
## 33 64 72 College Medium
## 34 70 97 College Low
## 35 69 79 Postgraduate Medium
## 36 69 64 Postgraduate Low
## 37 62 84 High School Low
## 38 68 77 High School Medium
## 39 68 67 College Medium
## 40 71 98 Postgraduate Medium
## 41 69 78 High School Medium
## 42 73 89 High School Medium
## 43 73 84 Postgraduate Low
## 44 66 77 High School High
## 45 65 81 High School Low
## 46 65 78 College Medium
## 47 69 92 High School Medium
## 48 67 87 High School Medium
## 49 68 87 College Medium
## 50 69 92 College Medium
## 51 67 76 Postgraduate Medium
## 52 65 94 High School Medium
## 53 66 89 College Low
## 54 61 61 College Medium
## 55 66 74 College Medium
## 56 68 94 College Medium
## 57 70 91 High School Low
## 58 66 71 College Medium
## 59 67 78 College High
## 60 68 76 High School Low
## 61 70 84 High School Low
## 62 72 88 High School Medium
## 63 75 91 College Low
## 64 67 68 College Low
## 65 65 96 High School Low
## 66 69 80 Postgraduate Medium
## 67 72 88 High School Low
## 68 68 87 Postgraduate Medium
## 69 66 74 High School Low
## 70 66 69 College Medium
## 71 67 62 College Medium
## 72 64 68 High School Medium
## 73 65 73 Postgraduate Medium
## 74 72 96 Postgraduate Medium
## 75 67 98 High School Medium
## 76 71 98 College Medium
## 77 69 94 High School Low
## 78 59 61 High School Medium
## 79 69 91 Postgraduate High
## 81 66 68 High School Low
## 82 68 81 College Low
## 83 63 89 High School Low
## 84 67 80 High School High
## 85 62 62 High School High
## 86 63 60 High School Low
## 87 72 90 High School High
## 88 65 67 College Low
## 89 65 70 High School Medium
## 90 63 73 Postgraduate Low
## 91 58 62 College Low
## 92 67 94 College Medium
## 93 65 67 High School Medium
## 94 69 79 Postgraduate Medium
## 95 71 87 Postgraduate Medium
## 96 66 66 High School High
## 97 67 81 College Low
## 98 65 68 High School Medium
## 99 61 61 College Medium
## 100 71 89 Postgraduate High
## Internet_Access Gender School_Type Hours_Studied Previous_Scores
## 1 No Male Public 23 66
## 2 Yes Female Public 23 74
## 3 Yes Female Public 29 86
## 4 Yes Male Public 9 90
## 5 Yes Male Public 23 88
## 6 Yes Female Public 14 69
## 7 No Male Private 9 95
## 8 Yes Female Public 28 93
## 9 No Male Public 20 50
## 10 Yes Male Private 8 92
## 11 Yes Female Public 14 71
## 12 Yes Male Public 23 76
## 13 Yes Female Public 13 88
## 14 Yes Male Public 12 100
## 15 Yes Female Public 17 50
## 16 Yes Male Public 21 87
## 17 Yes Male Public 14 73
## 18 Yes Female Private 21 70
## 19 Yes Female Private 12 91
## 20 Yes Male Private 24 65
## 21 Yes Female Private 16 85
## 22 Yes Female Public 15 53
## 23 Yes Male Public 19 65
## 24 Yes Male Public 28 81
## 25 No Male Public 23 91
## 26 Yes Male Public 14 71
## 27 Yes Female Public 20 89
## 28 Yes Male Private 21 94
## 29 Yes Male Private 28 71
## 30 No Male Public 16 68
## 31 Yes Male Public 21 71
## 32 Yes Male Public 20 96
## 33 Yes Male Private 22 71
## 34 Yes Male Public 23 80
## 35 Yes Male Public 21 99
## 36 Yes Male Private 28 84
## 37 Yes Male Private 17 71
## 38 Yes Female Private 26 90
## 39 Yes Male Public 26 94
## 40 Yes Male Public 21 88
## 41 Yes Male Public 33 86
## 42 Yes Female Private 24 99
## 43 Yes Female Public 31 98
## 44 Yes Male Public 19 86
## 45 No Female Public 15 68
## 46 Yes Male Public 8 84
## 47 Yes Female Public 17 66
## 48 No Male Public 22 68
## 49 Yes Male Private 15 52
## 50 Yes Female Public 23 72
## 51 Yes Male Public 17 71
## 52 Yes Female Private 10 65
## 53 Yes Male Public 15 82
## 54 Yes Male Public 20 63
## 55 Yes Male Public 29 64
## 56 Yes Male Public 15 64
## 57 Yes Female Public 27 70
## 58 No Female Public 27 51
## 59 Yes Male Public 18 85
## 60 Yes Female Private 23 96
## 61 Yes Male Public 25 84
## 62 Yes Female Public 21 93
## 63 Yes Female Public 30 95
## 64 Yes Male Public 24 56
## 65 Yes Male Public 5 72
## 66 Yes Male Private 20 81
## 67 Yes Female Public 25 95
## 68 Yes Male Public 20 75
## 69 Yes Male Private 16 74
## 70 Yes Female Private 18 85
## 71 Yes Male Public 26 75
## 72 Yes Male Private 12 75
## 73 Yes Male Public 15 63
## 74 No Male Private 26 63
## 75 Yes Female Private 12 54
## 76 Yes Female Private 15 61
## 77 Yes Female Public 24 75
## 78 Yes Female Private 8 78
## 79 Yes Male Public 19 59
## 81 Yes Female Public 20 76
## 82 Yes Male Public 23 59
## 83 No Female Private 14 67
## 84 Yes Female Private 20 86
## 85 Yes Male Public 14 97
## 86 Yes Male Public 22 92
## 87 Yes Male Public 27 96
## 88 Yes Female Private 20 59
## 89 Yes Male Public 24 71
## 90 No Male Private 15 61
## 91 Yes Male Public 7 51
## 92 Yes Female Private 14 71
## 93 Yes Male Private 13 76
## 94 Yes Male Private 16 91
## 95 Yes Female Public 23 76
## 96 Yes Female Public 20 95
## 97 Yes Female Public 22 80
## 98 Yes Female Public 19 66
## 99 Yes Female Public 19 67
## 100 Yes Male Public 25 82
Statistik Deskriptif
describe(df_clean[, c("Exam_Score", "Attendance", "Hours_Studied", "Previous_Scores")])
## vars n mean sd median trimmed mad min max range skew
## Exam_Score 1 99 67.08 3.57 67 67.14 2.97 58 75 17 -0.12
## Attendance 2 99 80.01 11.50 80 80.19 14.83 60 98 38 -0.10
## Hours_Studied 3 99 19.47 5.91 20 19.60 5.93 5 33 28 -0.18
## Previous_Scores 4 99 76.85 13.47 75 77.23 16.31 50 100 50 -0.12
## kurtosis se
## Exam_Score -0.38 0.36
## Attendance -1.27 1.16
## Hours_Studied -0.50 0.59
## Previous_Scores -0.99 1.35
Uji Normalitas Multivariat (Manova, Mancova)
mardia_result <- psych::mardia(dependent_vars, plot = FALSE)
print(mardia_result)
## Call: psych::mardia(x = dependent_vars, plot = FALSE)
##
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 99 num.vars = 2
## b1p = 0.15 skew = 2.54 with probability <= 0.64
## small sample skew = 2.67 with probability <= 0.61
## b2p = 6.71 kurtosis = -1.6 with probability <= 0.11
Di uji satu per satu - normalitas univariat
shapiro_exam <- shapiro.test(df_clean$Exam_Score)
print("Uji Shapiro-Wilk: Exam Score")
## [1] "Uji Shapiro-Wilk: Exam Score"
print(shapiro_exam)
##
## Shapiro-Wilk normality test
##
## data: df_clean$Exam_Score
## W = 0.98781, p-value = 0.5019
shapiro_att <- shapiro.test(df_clean$Attendance)
print("Uji Shapiro-Wilk: Attendance")
## [1] "Uji Shapiro-Wilk: Attendance"
print(shapiro_att)
##
## Shapiro-Wilk normality test
##
## data: df_clean$Attendance
## W = 0.94474, p-value = 0.0004086
par(mfrow=c(1,2))
hist(df_clean$Exam_Score, main="Distribusi Exam Score", col="orange", xlab="Skor")
hist(df_clean$Attendance, main="Distribusi Attendance", col="lightblue", xlab="Kehadiran")
Uji Homogenitas (Manova, Mancova)
faktor_list <- c("Parental_Education_Level", "Motivation_Level",
"Internet_Access", "Gender", "School_Type")
for (f in faktor_list) {
cat("\n--- Uji Box's M untuk:", f, "---\n")
hasil_boxm <- biotools::boxM(dependent_vars, df_clean[[f]])
print(hasil_boxm)
}
##
## --- Uji Box's M untuk: Parental_Education_Level ---
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dependent_vars
## Chi-Sq (approx.) = 4.4937, df = 6, p-value = 0.6102
##
##
## --- Uji Box's M untuk: Motivation_Level ---
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dependent_vars
## Chi-Sq (approx.) = 7.4383, df = 6, p-value = 0.2822
##
##
## --- Uji Box's M untuk: Internet_Access ---
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dependent_vars
## Chi-Sq (approx.) = 0.92746, df = 3, p-value = 0.8188
##
##
## --- Uji Box's M untuk: Gender ---
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dependent_vars
## Chi-Sq (approx.) = 2.1866, df = 3, p-value = 0.5346
##
##
## --- Uji Box's M untuk: School_Type ---
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dependent_vars
## Chi-Sq (approx.) = 3.0677, df = 3, p-value = 0.3813
Uji Dependensi (Manova, Mancova)
cortest.bartlett(cor(dependent_vars), n = nrow(dependent_vars))
## $chisq
## [1] 67.24761
##
## $p.value
## [1] 2.394605e-16
##
## $df
## [1] 1
Uji Linearitas (Ancova, Mancova)
nama_kovariat <- colnames(kovariat)
nama_dependen <- colnames(dependent_vars)
cat("UJI LINEARITAS\n")
## UJI LINEARITAS
for (dep in nama_dependen) {
cat("\n========================================\n")
cat("VARIABEL DEPENDEN:", dep, "\n")
cat("========================================\n")
for (kov in nama_kovariat) {
formula_linear <- as.formula(paste(dep, "~", kov))
model <- lm(formula_linear, data = df_clean)
p_val <- summary(model)$coefficients[2, 4]
cat("Kovariat:", kov, "\n")
cat("P-value:", p_val, "\n")
}
}
##
## ========================================
## VARIABEL DEPENDEN: Exam_Score
## ========================================
## Kovariat: Hours_Studied
## P-value: 4.052105e-12
## Kovariat: Previous_Scores
## P-value: 0.004606747
##
## ========================================
## VARIABEL DEPENDEN: Attendance
## ========================================
## Kovariat: Hours_Studied
## P-value: 0.1129201
## Kovariat: Previous_Scores
## P-value: 0.7756893
cat("UJI OBSERVASI INDEPENDENSI\n")
## UJI OBSERVASI INDEPENDENSI
# Exam Score
model_exam <- lm(Exam_Score ~ Hours_Studied + Previous_Scores +
Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type, data = df_clean)
dw_exam <- durbinWatsonTest(model_exam)
cat("\nUji Durbin-Watson untuk Exam Score:\n")
##
## Uji Durbin-Watson untuk Exam Score:
print(dw_exam)
## lag Autocorrelation D-W Statistic p-value
## 1 0.007809499 1.979608 0.888
## Alternative hypothesis: rho != 0
# Attendance
model_att <- lm(Attendance ~ Hours_Studied + Previous_Scores +
Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type, data = df_clean)
dw_att <- durbinWatsonTest(model_att)
cat("\nUji Durbin-Watson untuk Attendance:\n")
##
## Uji Durbin-Watson untuk Attendance:
print(dw_att)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.03732597 2.067247 0.724
## Alternative hypothesis: rho != 0
model_manova <- manova(
cbind(Exam_Score, Attendance) ~
Parental_Education_Level +
Motivation_Level +
Internet_Access +
Gender +
School_Type,
data = df_clean
)
summary(model_manova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Parental_Education_Level 2 0.050929 1.18891 4 182 0.3172
## Motivation_Level 2 0.031603 0.73051 4 182 0.5722
## Internet_Access 1 0.044366 2.08915 2 90 0.1298
## Gender 1 0.008093 0.36716 2 90 0.6937
## School_Type 1 0.008931 0.40550 2 90 0.6679
## Residuals 91
summary.aov(model_manova)
## Response Exam_Score :
## Df Sum Sq Mean Sq F value Pr(>F)
## Parental_Education_Level 2 45.76 22.878 1.8267 0.1668
## Motivation_Level 2 9.95 4.973 0.3970 0.6735
## Internet_Access 1 35.09 35.088 2.8016 0.0976 .
## Gender 1 8.88 8.876 0.7087 0.4021
## School_Type 1 9.97 9.970 0.7961 0.3746
## Residuals 91 1139.72 12.524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Attendance :
## Df Sum Sq Mean Sq F value Pr(>F)
## Parental_Education_Level 2 54.2 27.076 0.1959 0.8224
## Motivation_Level 2 245.3 122.633 0.8874 0.4153
## Internet_Access 1 18.3 18.268 0.1322 0.7170
## Gender 1 31.0 31.005 0.2244 0.6369
## School_Type 1 38.8 38.832 0.2810 0.5973
## Residuals 91 12575.5 138.192
TukeyHSD(aov(Exam_Score ~ Motivation_Level, data = df_clean))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Exam_Score ~ Motivation_Level, data = df_clean)
##
## $Motivation_Level
## diff lwr upr p adj
## Low-High 0.2641129 -2.371268 2.899494 0.9691191
## Medium-High -0.3605769 -2.808108 2.086954 0.9344948
## Medium-Low -0.6246898 -2.567328 1.317948 0.7249787
model_mancova <- manova(
cbind(Exam_Score, Attendance) ~
Parental_Education_Level + Motivation_Level + Internet_Access +
Gender + School_Type +
Hours_Studied + Previous_Scores,
data = data
)
summary(model_mancova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Parental_Education_Level 3 0.98011 22.19 6 13190 < 2.2e-16 ***
## Motivation_Level 2 0.98003 33.42 4 13190 < 2.2e-16 ***
## Internet_Access 1 0.99068 31.02 2 6595 3.916e-14 ***
## Gender 1 0.99974 0.85 2 6595 0.4285
## School_Type 1 0.99967 1.07 2 6595 0.3414
## Hours_Studied 1 0.66490 1661.87 2 6595 < 2.2e-16 ***
## Previous_Scores 1 0.92902 251.94 2 6595 < 2.2e-16 ***
## Residuals 6596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(car)
mancova_res <- Manova(model_mancova, test.statistic = "Pillai")
summary(mancova_res)
##
## Type II MANOVA Tests:
##
## Sum of squares and products for error:
## Exam_Score Attendance
## Exam_Score 75177.99 174191.3
## Attendance 174191.27 879023.0
##
## ------------------------------------------
##
## Term: Parental_Education_Level
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 1216.198 831.804
## Attendance 831.804 626.776
##
## Multivariate Tests: Parental_Education_Level
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 3 0.0226034 25.13273 6 13192 < 2.22e-16 ***
## Wilks 3 0.9773985 25.27201 6 13190 < 2.22e-16 ***
## Hotelling-Lawley 3 0.0231222 25.41126 6 13188 < 2.22e-16 ***
## Roy 3 0.0230367 50.64997 3 6596 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: Motivation_Level
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 820.57265 -89.13317
## Attendance -89.13317 139.39217
##
## Multivariate Tests: Motivation_Level
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 2 0.0209034 34.83373 4 13192 < 2.22e-16 ***
## Wilks 2 0.9790995 35.00938 4 13190 < 2.22e-16 ***
## Hotelling-Lawley 2 0.0213436 35.18498 4 13188 < 2.22e-16 ***
## Roy 2 0.0212032 69.92809 2 6596 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: Internet_Access
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 224.1282 -255.5875
## Attendance -255.5875 291.4625
##
## Multivariate Tests: Internet_Access
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0085431 28.41373 2 6595 5.1634e-13 ***
## Wilks 1 0.9914569 28.41373 2 6595 5.1634e-13 ***
## Hotelling-Lawley 1 0.0086167 28.41373 2 6595 5.1634e-13 ***
## Roy 1 0.0086167 28.41373 2 6595 5.1634e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: Gender
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 0.4276813 -5.026164
## Attendance -5.0261645 59.068124
##
## Multivariate Tests: Gender
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0001837 0.6059396 2 6595 0.54559
## Wilks 1 0.9998163 0.6059396 2 6595 0.54559
## Hotelling-Lawley 1 0.0001838 0.6059396 2 6595 0.54559
## Roy 1 0.0001838 0.6059396 2 6595 0.54559
##
## ------------------------------------------
##
## Term: School_Type
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 11.72061 56.26725
## Attendance 56.26725 270.12269
##
## Multivariate Tests: School_Type
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0003079 1.015569 2 6595 0.36225
## Wilks 1 0.9996921 1.015569 2 6595 0.36225
## Hotelling-Lawley 1 0.0003080 1.015569 2 6595 0.36225
## Roy 1 0.0003080 1.015569 2 6595 0.36225
##
## ------------------------------------------
##
## Term: Hours_Studied
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 19578.082 -1247.31823
## Attendance -1247.318 79.46656
##
## Multivariate Tests: Hours_Studied
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.3305839 1628.435 2 6595 < 2.22e-16 ***
## Wilks 1 0.6694161 1628.435 2 6595 < 2.22e-16 ***
## Hotelling-Lawley 1 0.4938393 1628.435 2 6595 < 2.22e-16 ***
## Roy 1 0.4938393 1628.435 2 6595 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: Previous_Scores
##
## Sum of squares and products for the hypothesis:
## Exam_Score Attendance
## Exam_Score 2708.365 -934.9090
## Attendance -934.909 322.7241
##
## Multivariate Tests: Previous_Scores
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0709798 251.9384 2 6595 < 2.22e-16 ***
## Wilks 1 0.9290202 251.9384 2 6595 < 2.22e-16 ***
## Hotelling-Lawley 1 0.0764029 251.9384 2 6595 < 2.22e-16 ***
## Roy 1 0.0764029 251.9384 2 6595 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSPE <- mancova_res$SSPE
terms <- names(mancova_res$SSP)
df_h <- mancova_res$df
df_e <- mancova_res$error.df
p_dv <- nrow(SSPE)
tabel_mancova <- do.call(rbind, lapply(seq_along(terms), function(i) {
SSH <- mancova_res$SSP[[i]]
vh <- df_h[i]
ve <- df_e
pillai <- sum(diag(SSH %*% solve(SSH + SSPE)))
s <- min(p_dv, vh)
m <- (abs(p_dv - vh) - 1) / 2
nn <- (ve - p_dv - 1) / 2
approx_f <- ((2*nn + s + 1) / (2*m + s + 1)) * (pillai / (s - pillai))
num_df <- s * (2*m + s + 1)
den_df <- s * (2*nn + s + 1)
p_val <- pf(approx_f, num_df, den_df, lower.tail = FALSE)
data.frame(
Efek = terms[i],
Pillai = round(pillai, 4),
Approx_F = round(approx_f, 4),
Num_Df = round(num_df, 0),
Den_Df = round(den_df, 0),
p_value = round(p_val, 4)
)
}))
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
tabel_mancova |>
kable(
caption = "Tabel Hasil Uji MANCOVA (Pillai's Trace)",
col.names = c("Efek","Pillai's Trace","Approx F","Num Df","Den Df","p-value"),
align = "lccccc"
) |>
kable_styling(
bootstrap_options = c("striped","hover","bordered"),
full_width = TRUE
)
| Efek | Pillai’s Trace | Approx F | Num Df | Den Df | p-value | |
|---|---|---|---|---|---|---|
| Parental_Education_Level | Parental_Education_Level | 0.0226 | 25.1327 | 6 | 13192 | 0.0000 |
| Motivation_Level | Motivation_Level | 0.0209 | 34.8337 | 4 | 13192 | 0.0000 |
| Internet_Access | Internet_Access | 0.0085 | 28.4137 | 2 | 6595 | 0.0000 |
| Gender | Gender | 0.0002 | 0.6059 | 2 | 6595 | 0.5456 |
| School_Type | School_Type | 0.0003 | 1.0156 | 2 | 6595 | 0.3623 |
| Hours_Studied | Hours_Studied | 0.3306 | 1628.4350 | 2 | 6595 | 0.0000 |
| Previous_Scores | Previous_Scores | 0.0710 | 251.9384 | 2 | 6595 | 0.0000 |
library(effectsize)
## Warning: package 'effectsize' was built under R version 4.5.3
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
# Exam Score
es_exam <- eta_squared(
lm(Exam_Score ~ Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type +
Hours_Studied + Previous_Scores, data = data),
partial = TRUE
)
# Attendance
es_att <- eta_squared(
lm(Attendance ~ Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type +
Hours_Studied + Previous_Scores, data = data),
partial = TRUE
)
cat("Exam_Score")
## Exam_Score
print(es_exam)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## --------------------------------------------------------
## Parental_Education_Level | 0.01 | [0.01, 1.00]
## Motivation_Level | 0.01 | [0.01, 1.00]
## Internet_Access | 3.30e-03 | [0.00, 1.00]
## Gender | 1.99e-05 | [0.00, 1.00]
## School_Type | 9.63e-05 | [0.00, 1.00]
## Hours_Studied | 0.21 | [0.20, 1.00]
## Previous_Scores | 0.03 | [0.03, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
cat("Attendance")
## Attendance
print(es_att)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## --------------------------------------------------------
## Parental_Education_Level | 7.43e-04 | [0.00, 1.00]
## Motivation_Level | 1.67e-04 | [0.00, 1.00]
## Internet_Access | 3.40e-04 | [0.00, 1.00]
## Gender | 6.90e-05 | [0.00, 1.00]
## School_Type | 3.17e-04 | [0.00, 1.00]
## Hours_Studied | 9.99e-05 | [0.00, 1.00]
## Previous_Scores | 3.67e-04 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
model_anova <- aov(Exam_Score ~ Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type, data = df_clean)
result_anova <- Anova(model_anova, type = "III")
print(result_anova)
## Anova Table (Type III tests)
##
## Response: Exam_Score
## Sum Sq Df F value Pr(>F)
## (Intercept) 16203.7 1 1293.7765 < 2e-16 ***
## Parental_Education_Level 67.8 2 2.7062 0.07217 .
## Motivation_Level 10.4 2 0.4162 0.66079
## Internet_Access 30.4 1 2.4308 0.12244
## Gender 10.8 1 0.8634 0.35524
## School_Type 10.0 1 0.7961 0.37462
## Residuals 1139.7 91
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## Parental_Education_Level 2 45.8 22.88 1.827 0.1668
## Motivation_Level 2 9.9 4.97 0.397 0.6735
## Internet_Access 1 35.1 35.09 2.802 0.0976 .
## Gender 1 8.9 8.88 0.709 0.4021
## School_Type 1 10.0 9.97 0.796 0.3746
## Residuals 91 1139.7 12.52
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_ancova <- aov(Exam_Score ~ Hours_Studied + Previous_Scores +
Parental_Education_Level + Motivation_Level +
Internet_Access + Gender + School_Type,
data = df_clean)
result_ancova <- Anova(model_ancova, type="III")
print(result_ancova)
## Anova Table (Type III tests)
##
## Response: Exam_Score
## Sum Sq Df F value Pr(>F)
## (Intercept) 5615.6 1 742.6635 < 2.2e-16 ***
## Hours_Studied 386.9 1 51.1684 2.285e-10 ***
## Previous_Scores 24.9 1 3.2957 0.07283 .
## Parental_Education_Level 29.5 2 1.9492 0.14842
## Motivation_Level 0.3 2 0.0186 0.98154
## Internet_Access 12.6 1 1.6631 0.20052
## Gender 3.3 1 0.4338 0.51185
## School_Type 3.2 1 0.4212 0.51801
## Residuals 673.0 89
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(model_ancova)
##
## Call:
## aov(formula = Exam_Score ~ Hours_Studied + Previous_Scores +
## Parental_Education_Level + Motivation_Level + Internet_Access +
## Gender + School_Type, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2621 -1.9055 0.1164 1.7400 5.7007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.38210 2.06893 27.252 < 2e-16 ***
## Hours_Studied 0.35969 0.05028 7.153 2.28e-10 ***
## Previous_Scores 0.04052 0.02232 1.815 0.0728 .
## Parental_Education_LevelHigh School -0.27295 0.67559 -0.404 0.6872
## Parental_Education_LevelPostgraduate 1.17789 0.77371 1.522 0.1315
## Motivation_LevelLow -0.15860 0.87678 -0.181 0.8569
## Motivation_LevelMedium -0.14553 0.82244 -0.177 0.8600
## Internet_AccessYes 1.19580 0.92724 1.290 0.2005
## GenderMale -0.39334 0.59723 -0.659 0.5118
## School_TypePublic -0.41188 0.63463 -0.649 0.5180
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.75 on 89 degrees of freedom
## Multiple R-squared: 0.4614, Adjusted R-squared: 0.4069
## F-statistic: 8.47 on 9 and 89 DF, p-value: 4.843e-09