Kelas 2024B
Maris Al Sabina A. (24031554188)
Dewanggi Erchinta Dwi Putri (24031554034)
Naila Alya Furqon (24031554069)
Nuri Maulidyatul Haliza (24031554160)
Dosen Pengampu: Dinda Galuh Guminta, M.Stat.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ---
## biotools version 4.3
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
## Registered S3 method overwritten by 'lme4':
## method from
## na.action.merMod car
library(corrplot)
## corrplot 0.95 loaded
data <- read.csv("student_data.csv")
head(data)
## school sex age address famsize Pstatus Medu Fedu Mjob Fjob reason
## 1 GP F 18 U GT3 A 4 4 at_home teacher course
## 2 GP F 17 U GT3 T 1 1 at_home other course
## 3 GP F 15 U LE3 T 1 1 at_home other other
## 4 GP F 15 U GT3 T 4 2 health services home
## 5 GP F 16 U GT3 T 3 3 other other home
## 6 GP M 16 U LE3 T 4 3 services other reputation
## guardian traveltime studytime failures schoolsup famsup paid activities
## 1 mother 2 2 0 yes no no no
## 2 father 1 2 0 no yes no no
## 3 mother 1 2 3 yes no yes no
## 4 mother 1 3 0 no yes yes yes
## 5 father 1 2 0 no yes yes no
## 6 mother 1 2 0 no yes yes yes
## nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1 yes yes no no 4 3 4 1 1 3
## 2 no yes yes no 5 3 3 1 1 3
## 3 yes yes yes no 4 3 2 2 3 3
## 4 yes yes yes yes 3 2 2 1 1 5
## 5 yes yes no no 4 3 2 1 2 5
## 6 yes yes yes no 5 4 2 1 2 5
## absences G1 G2 G3
## 1 6 5 6 6
## 2 4 5 5 6
## 3 10 7 8 10
## 4 2 15 14 15
## 5 4 6 10 10
## 6 10 15 15 15
str(data)
## 'data.frame': 395 obs. of 33 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities: chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : int 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : int 6 5 8 14 10 15 12 5 18 15 ...
## $ G3 : int 6 6 10 15 10 15 11 6 19 15 ...
colnames(data) <- c(
"sekolah", "jenis_kelamin", "umur", "alamat", "jumlah_keluarga",
"status_orang_tua", "pendidikan_ibu", "pendidikan_ayah", "pekerjaan_ibu", "pekerjaan_ayah",
"alasan_sekolah", "wali", "waktu_tempuh", "waktu_belajar", "jumlah_gagal",
"dukungan_sekolah", "dukungan_keluarga", "les", "aktivitas", "tk",
"ingin_kuliah", "internet", "pacaran", "hubungan_keluarga", "waktu_luang",
"keluar", "alkohol_harian", "alkohol_akhir_pekan", "kesehatan", "absensi",
"nilai_G1", "nilai_G2", "nilai_G3"
)
data$jenis_kelamin <- as.factor(data$jenis_kelamin)
data$sekolah <- as.factor(data$sekolah)
data$alamat <- as.factor(data$alamat)
data$internet <- as.factor(data$internet)
data <- na.omit(data)
summary(data)
## sekolah jenis_kelamin umur alamat jumlah_keluarga
## GP:349 F:208 Min. :15.0 R: 88 Length:395
## MS: 46 M:187 1st Qu.:16.0 U:307 Class :character
## Median :17.0 Mode :character
## Mean :16.7
## 3rd Qu.:18.0
## Max. :22.0
## status_orang_tua pendidikan_ibu pendidikan_ayah pekerjaan_ibu
## Length:395 Min. :0.000 Min. :0.000 Length:395
## Class :character 1st Qu.:2.000 1st Qu.:2.000 Class :character
## Mode :character Median :3.000 Median :2.000 Mode :character
## Mean :2.749 Mean :2.522
## 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000
## pekerjaan_ayah alasan_sekolah wali waktu_tempuh
## Length:395 Length:395 Length:395 Min. :1.000
## Class :character Class :character Class :character 1st Qu.:1.000
## Mode :character Mode :character Mode :character Median :1.000
## Mean :1.448
## 3rd Qu.:2.000
## Max. :4.000
## waktu_belajar jumlah_gagal dukungan_sekolah dukungan_keluarga
## Min. :1.000 Min. :0.0000 Length:395 Length:395
## 1st Qu.:1.000 1st Qu.:0.0000 Class :character Class :character
## Median :2.000 Median :0.0000 Mode :character Mode :character
## Mean :2.035 Mean :0.3342
## 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :3.0000
## les aktivitas tk ingin_kuliah
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## internet pacaran hubungan_keluarga waktu_luang keluar
## no : 66 Length:395 Min. :1.000 Min. :1.000 Min. :1.000
## yes:329 Class :character 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000
## Mode :character Median :4.000 Median :3.000 Median :3.000
## Mean :3.944 Mean :3.235 Mean :3.109
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
## alkohol_harian alkohol_akhir_pekan kesehatan absensi
## Min. :1.000 Min. :1.000 Min. :1.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.: 0.000
## Median :1.000 Median :2.000 Median :4.000 Median : 4.000
## Mean :1.481 Mean :2.291 Mean :3.554 Mean : 5.709
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.: 8.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :75.000
## nilai_G1 nilai_G2 nilai_G3
## Min. : 3.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 8.00 1st Qu.: 9.00 1st Qu.: 8.00
## Median :11.00 Median :11.00 Median :11.00
## Mean :10.91 Mean :10.71 Mean :10.42
## 3rd Qu.:13.00 3rd Qu.:13.00 3rd Qu.:14.00
## Max. :19.00 Max. :19.00 Max. :20.00
summary(data[, c("nilai_G1","nilai_G2","nilai_G3","waktu_belajar","absensi")])
## nilai_G1 nilai_G2 nilai_G3 waktu_belajar
## Min. : 3.00 Min. : 0.00 Min. : 0.00 Min. :1.000
## 1st Qu.: 8.00 1st Qu.: 9.00 1st Qu.: 8.00 1st Qu.:1.000
## Median :11.00 Median :11.00 Median :11.00 Median :2.000
## Mean :10.91 Mean :10.71 Mean :10.42 Mean :2.035
## 3rd Qu.:13.00 3rd Qu.:13.00 3rd Qu.:14.00 3rd Qu.:2.000
## Max. :19.00 Max. :19.00 Max. :20.00 Max. :4.000
## absensi
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 4.000
## Mean : 5.709
## 3rd Qu.: 8.000
## Max. :75.000
# 1. Normalitas Univariat
cat("Uji Normalitas Univariat\n")
## Uji Normalitas Univariat
shapiro.test(data$nilai_G1)
##
## Shapiro-Wilk normality test
##
## data: data$nilai_G1
## W = 0.97491, p-value = 2.454e-06
shapiro.test(data$nilai_G2)
##
## Shapiro-Wilk normality test
##
## data: data$nilai_G2
## W = 0.96914, p-value = 2.084e-07
# 2. Homogenitas Kovarians
cat("\nUji Homogenitas Kovarians (Box's M)\n")
##
## Uji Homogenitas Kovarians (Box's M)
boxM(data[, c("nilai_G1","nilai_G2")], data$jenis_kelamin)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data[, c("nilai_G1", "nilai_G2")]
## Chi-Sq (approx.) = 0.85882, df = 3, p-value = 0.8354
boxM(data[, c("nilai_G1","nilai_G2")], data$sekolah)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data[, c("nilai_G1", "nilai_G2")]
## Chi-Sq (approx.) = 24.717, df = 3, p-value = 1.769e-05
# 3. Homogenitas Varians
cat("\nUji Homogenitas Varians (Levene Test)\n")
##
## Uji Homogenitas Varians (Levene Test)
leveneTest(nilai_G1 ~ jenis_kelamin * sekolah, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.2594 0.288
## 391
leveneTest(nilai_G2 ~ jenis_kelamin * sekolah, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.6057 0.6117
## 391
# 4. Dependensi
cat("\nUji Dependensi (Korelasi)\n")
##
## Uji Dependensi (Korelasi)
cor.test(data$nilai_G1, data$nilai_G2)
##
## Pearson's product-moment correlation
##
## data: data$nilai_G1 and data$nilai_G2
## t = 32.278, df = 393, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8226117 0.8770475
## sample estimates:
## cor
## 0.8521181
# 5. Multikolinearitas
cat("\nUji Multikolinearitas (VIF)\n")
##
## Uji Multikolinearitas (VIF)
model_lm <- lm(nilai_G1 ~ waktu_belajar + absensi, data = data)
vif(model_lm)
## waktu_belajar absensi
## 1.003947 1.003947
# Analisis MANOVA
model_manova <- manova(
cbind(nilai_G1, nilai_G2) ~ jenis_kelamin + sekolah,
data = data
)
summary(model_manova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## jenis_kelamin 1 0.99095 1.78530 2 391 0.1691
## sekolah 1 0.99652 0.68302 2 391 0.5057
## Residuals 392
summary.aov(model_manova)
## Response nilai_G1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## jenis_kelamin 1 36.6 36.611 3.3365 0.06852 .
## sekolah 1 2.6 2.628 0.2395 0.62486
## Residuals 392 4301.5 10.973
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nilai_G2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## jenis_kelamin 1 46.3 46.265 3.2884 0.07053 .
## sekolah 1 13.4 13.368 0.9502 0.33027
## Residuals 392 5515.0 14.069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_mancova <- manova(
cbind(nilai_G1, nilai_G2) ~ jenis_kelamin + sekolah + waktu_belajar + absensi,
data = data
)
summary(model_mancova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## jenis_kelamin 1 0.99061 1.8440 2 389 0.1595657
## sekolah 1 0.99649 0.6847 2 389 0.5048647
## waktu_belajar 1 0.96091 7.9116 2 389 0.0004286 ***
## absensi 1 0.99975 0.0490 2 389 0.9522323
## Residuals 390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(model_mancova)
## Response nilai_G1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## jenis_kelamin 1 36.6 36.611 3.4547 0.06383 .
## sekolah 1 2.6 2.628 0.2480 0.61880
## waktu_belajar 1 168.1 168.112 15.8631 8.124e-05 ***
## absensi 1 0.3 0.285 0.0269 0.86987
## Residuals 390 4133.1 10.598
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nilai_G2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## jenis_kelamin 1 46.3 46.265 3.3684 0.0672183 .
## sekolah 1 13.4 13.368 0.9733 0.3244641
## waktu_belajar 1 157.4 157.375 11.4581 0.0007841 ***
## absensi 1 1.1 1.084 0.0790 0.7788629
## Residuals 390 5356.6 13.735
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(data, aes(x = waktu_belajar, y = nilai_G3)) +
geom_point() +
geom_smooth(method = "lm") +
ggtitle("Pengaruh Waktu Belajar terhadap Nilai")
## `geom_smooth()` using formula = 'y ~ x'
# Korelasi
cor(data[, c("nilai_G1","nilai_G2","waktu_belajar","absensi")])
## nilai_G1 nilai_G2 waktu_belajar absensi
## nilai_G1 1.0000000 0.8521181 0.16061192 -0.03100290
## nilai_G2 0.8521181 1.0000000 0.13588000 -0.03177670
## waktu_belajar 0.1606119 0.1358800 1.00000000 -0.06270018
## absensi -0.0310029 -0.0317767 -0.06270018 1.00000000
cor_matrix <- cor(data[, c("nilai_G1","nilai_G2","nilai_G3","waktu_belajar","absensi")])
corrplot(cor_matrix, method = "color", type = "upper")
# Boxplot Jenis Kelamin
ggplot(data, aes(x = jenis_kelamin, y = nilai_G1)) +
geom_boxplot() +
ggtitle("Nilai G1 berdasarkan Jenis Kelamin")