1 Library

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ---
## biotools version 4.3
library(MVN) 
## Warning: package 'MVN' was built under R version 4.5.3
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car
library(corrplot)
## corrplot 0.95 loaded
data <- read.csv("student_data.csv")
head(data)
##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        3       yes     no  yes         no
## 4   mother          1         3        0        no    yes  yes        yes
## 5   father          1         2        0        no    yes  yes         no
## 6   mother          1         2        0        no    yes  yes        yes
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
## 6     yes    yes      yes       no      5        4     2    1    2      5
##   absences G1 G2 G3
## 1        6  5  6  6
## 2        4  5  5  6
## 3       10  7  8 10
## 4        2 15 14 15
## 5        4  6 10 10
## 6       10 15 15 15
str(data)
## 'data.frame':    395 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...

2 Ubah Nama Kolom

colnames(data) <- c(
  "sekolah", "jenis_kelamin", "umur", "alamat", "jumlah_keluarga",
  "status_orang_tua", "pendidikan_ibu", "pendidikan_ayah", "pekerjaan_ibu", "pekerjaan_ayah",
  "alasan_sekolah", "wali", "waktu_tempuh", "waktu_belajar", "jumlah_gagal",
  "dukungan_sekolah", "dukungan_keluarga", "les", "aktivitas", "tk",
  "ingin_kuliah", "internet", "pacaran", "hubungan_keluarga", "waktu_luang",
  "keluar", "alkohol_harian", "alkohol_akhir_pekan", "kesehatan", "absensi",
  "nilai_G1", "nilai_G2", "nilai_G3"
)

3 Data Preprocessing

data$jenis_kelamin <- as.factor(data$jenis_kelamin)
data$sekolah <- as.factor(data$sekolah)
data$alamat <- as.factor(data$alamat)
data$internet <- as.factor(data$internet)

data <- na.omit(data)
summary(data)
##  sekolah  jenis_kelamin      umur      alamat  jumlah_keluarga   
##  GP:349   F:208         Min.   :15.0   R: 88   Length:395        
##  MS: 46   M:187         1st Qu.:16.0   U:307   Class :character  
##                         Median :17.0           Mode  :character  
##                         Mean   :16.7                             
##                         3rd Qu.:18.0                             
##                         Max.   :22.0                             
##  status_orang_tua   pendidikan_ibu  pendidikan_ayah pekerjaan_ibu     
##  Length:395         Min.   :0.000   Min.   :0.000   Length:395        
##  Class :character   1st Qu.:2.000   1st Qu.:2.000   Class :character  
##  Mode  :character   Median :3.000   Median :2.000   Mode  :character  
##                     Mean   :2.749   Mean   :2.522                     
##                     3rd Qu.:4.000   3rd Qu.:3.000                     
##                     Max.   :4.000   Max.   :4.000                     
##  pekerjaan_ayah     alasan_sekolah         wali            waktu_tempuh  
##  Length:395         Length:395         Length:395         Min.   :1.000  
##  Class :character   Class :character   Class :character   1st Qu.:1.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.000  
##                                                           Mean   :1.448  
##                                                           3rd Qu.:2.000  
##                                                           Max.   :4.000  
##  waktu_belajar    jumlah_gagal    dukungan_sekolah   dukungan_keluarga 
##  Min.   :1.000   Min.   :0.0000   Length:395         Length:395        
##  1st Qu.:1.000   1st Qu.:0.0000   Class :character   Class :character  
##  Median :2.000   Median :0.0000   Mode  :character   Mode  :character  
##  Mean   :2.035   Mean   :0.3342                                        
##  3rd Qu.:2.000   3rd Qu.:0.0000                                        
##  Max.   :4.000   Max.   :3.0000                                        
##      les             aktivitas              tk            ingin_kuliah      
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  internet    pacaran          hubungan_keluarga  waktu_luang        keluar     
##  no : 66   Length:395         Min.   :1.000     Min.   :1.000   Min.   :1.000  
##  yes:329   Class :character   1st Qu.:4.000     1st Qu.:3.000   1st Qu.:2.000  
##            Mode  :character   Median :4.000     Median :3.000   Median :3.000  
##                               Mean   :3.944     Mean   :3.235   Mean   :3.109  
##                               3rd Qu.:5.000     3rd Qu.:4.000   3rd Qu.:4.000  
##                               Max.   :5.000     Max.   :5.000   Max.   :5.000  
##  alkohol_harian  alkohol_akhir_pekan   kesehatan        absensi      
##  Min.   :1.000   Min.   :1.000       Min.   :1.000   Min.   : 0.000  
##  1st Qu.:1.000   1st Qu.:1.000       1st Qu.:3.000   1st Qu.: 0.000  
##  Median :1.000   Median :2.000       Median :4.000   Median : 4.000  
##  Mean   :1.481   Mean   :2.291       Mean   :3.554   Mean   : 5.709  
##  3rd Qu.:2.000   3rd Qu.:3.000       3rd Qu.:5.000   3rd Qu.: 8.000  
##  Max.   :5.000   Max.   :5.000       Max.   :5.000   Max.   :75.000  
##     nilai_G1        nilai_G2        nilai_G3    
##  Min.   : 3.00   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 8.00   1st Qu.: 9.00   1st Qu.: 8.00  
##  Median :11.00   Median :11.00   Median :11.00  
##  Mean   :10.91   Mean   :10.71   Mean   :10.42  
##  3rd Qu.:13.00   3rd Qu.:13.00   3rd Qu.:14.00  
##  Max.   :19.00   Max.   :19.00   Max.   :20.00

4 Statistik Deskriptif

summary(data[, c("nilai_G1","nilai_G2","nilai_G3","waktu_belajar","absensi")])
##     nilai_G1        nilai_G2        nilai_G3     waktu_belajar  
##  Min.   : 3.00   Min.   : 0.00   Min.   : 0.00   Min.   :1.000  
##  1st Qu.: 8.00   1st Qu.: 9.00   1st Qu.: 8.00   1st Qu.:1.000  
##  Median :11.00   Median :11.00   Median :11.00   Median :2.000  
##  Mean   :10.91   Mean   :10.71   Mean   :10.42   Mean   :2.035  
##  3rd Qu.:13.00   3rd Qu.:13.00   3rd Qu.:14.00   3rd Qu.:2.000  
##  Max.   :19.00   Max.   :19.00   Max.   :20.00   Max.   :4.000  
##     absensi      
##  Min.   : 0.000  
##  1st Qu.: 0.000  
##  Median : 4.000  
##  Mean   : 5.709  
##  3rd Qu.: 8.000  
##  Max.   :75.000

5 Uji Asumsi

# 1. Normalitas Univariat
cat("Uji Normalitas Univariat\n")
## Uji Normalitas Univariat
shapiro.test(data$nilai_G1)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$nilai_G1
## W = 0.97491, p-value = 2.454e-06
shapiro.test(data$nilai_G2)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$nilai_G2
## W = 0.96914, p-value = 2.084e-07
# 2. Homogenitas Kovarians
cat("\nUji Homogenitas Kovarians (Box's M)\n")
## 
## Uji Homogenitas Kovarians (Box's M)
boxM(data[, c("nilai_G1","nilai_G2")], data$jenis_kelamin)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data[, c("nilai_G1", "nilai_G2")]
## Chi-Sq (approx.) = 0.85882, df = 3, p-value = 0.8354
boxM(data[, c("nilai_G1","nilai_G2")], data$sekolah)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data[, c("nilai_G1", "nilai_G2")]
## Chi-Sq (approx.) = 24.717, df = 3, p-value = 1.769e-05
# 3. Homogenitas Varians
cat("\nUji Homogenitas Varians (Levene Test)\n")
## 
## Uji Homogenitas Varians (Levene Test)
leveneTest(nilai_G1 ~ jenis_kelamin * sekolah, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.2594  0.288
##       391
leveneTest(nilai_G2 ~ jenis_kelamin * sekolah, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.6057 0.6117
##       391
# 4. Dependensi
cat("\nUji Dependensi (Korelasi)\n")
## 
## Uji Dependensi (Korelasi)
cor.test(data$nilai_G1, data$nilai_G2)
## 
##  Pearson's product-moment correlation
## 
## data:  data$nilai_G1 and data$nilai_G2
## t = 32.278, df = 393, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8226117 0.8770475
## sample estimates:
##       cor 
## 0.8521181
# 5. Multikolinearitas
cat("\nUji Multikolinearitas (VIF)\n")
## 
## Uji Multikolinearitas (VIF)
model_lm <- lm(nilai_G1 ~ waktu_belajar + absensi, data = data)
vif(model_lm)
## waktu_belajar       absensi 
##      1.003947      1.003947
# Analisis MANOVA
model_manova <- manova(
  cbind(nilai_G1, nilai_G2) ~ jenis_kelamin + sekolah,
  data = data
)

summary(model_manova, test = "Wilks")
##                Df   Wilks approx F num Df den Df Pr(>F)
## jenis_kelamin   1 0.99095  1.78530      2    391 0.1691
## sekolah         1 0.99652  0.68302      2    391 0.5057
## Residuals     392
summary.aov(model_manova)
##  Response nilai_G1 :
##                Df Sum Sq Mean Sq F value  Pr(>F)  
## jenis_kelamin   1   36.6  36.611  3.3365 0.06852 .
## sekolah         1    2.6   2.628  0.2395 0.62486  
## Residuals     392 4301.5  10.973                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nilai_G2 :
##                Df Sum Sq Mean Sq F value  Pr(>F)  
## jenis_kelamin   1   46.3  46.265  3.2884 0.07053 .
## sekolah         1   13.4  13.368  0.9502 0.33027  
## Residuals     392 5515.0  14.069                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6 Analisis MANCOVA

model_mancova <- manova(
  cbind(nilai_G1, nilai_G2) ~ jenis_kelamin + sekolah + waktu_belajar + absensi,
  data = data
)
summary(model_mancova, test = "Wilks")
##                Df   Wilks approx F num Df den Df    Pr(>F)    
## jenis_kelamin   1 0.99061   1.8440      2    389 0.1595657    
## sekolah         1 0.99649   0.6847      2    389 0.5048647    
## waktu_belajar   1 0.96091   7.9116      2    389 0.0004286 ***
## absensi         1 0.99975   0.0490      2    389 0.9522323    
## Residuals     390                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_mancova)
##  Response nilai_G1 :
##                Df Sum Sq Mean Sq F value    Pr(>F)    
## jenis_kelamin   1   36.6  36.611  3.4547   0.06383 .  
## sekolah         1    2.6   2.628  0.2480   0.61880    
## waktu_belajar   1  168.1 168.112 15.8631 8.124e-05 ***
## absensi         1    0.3   0.285  0.0269   0.86987    
## Residuals     390 4133.1  10.598                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nilai_G2 :
##                Df Sum Sq Mean Sq F value    Pr(>F)    
## jenis_kelamin   1   46.3  46.265  3.3684 0.0672183 .  
## sekolah         1   13.4  13.368  0.9733 0.3244641    
## waktu_belajar   1  157.4 157.375 11.4581 0.0007841 ***
## absensi         1    1.1   1.084  0.0790 0.7788629    
## Residuals     390 5356.6  13.735                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7 Visualisasi

7.1 Scatter Plot

ggplot(data, aes(x = waktu_belajar, y = nilai_G3)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("Pengaruh Waktu Belajar terhadap Nilai")
## `geom_smooth()` using formula = 'y ~ x'

# Korelasi

cor(data[, c("nilai_G1","nilai_G2","waktu_belajar","absensi")])
##                 nilai_G1   nilai_G2 waktu_belajar     absensi
## nilai_G1       1.0000000  0.8521181    0.16061192 -0.03100290
## nilai_G2       0.8521181  1.0000000    0.13588000 -0.03177670
## waktu_belajar  0.1606119  0.1358800    1.00000000 -0.06270018
## absensi       -0.0310029 -0.0317767   -0.06270018  1.00000000

8 Heatmap Korelasi

cor_matrix <- cor(data[, c("nilai_G1","nilai_G2","nilai_G3","waktu_belajar","absensi")])

corrplot(cor_matrix, method = "color", type = "upper")

# Boxplot Jenis Kelamin

ggplot(data, aes(x = jenis_kelamin, y = nilai_G1)) +
  geom_boxplot() +
  ggtitle("Nilai G1 berdasarkan Jenis Kelamin")