Load Library

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(readr)

-MENG-INPUT DATA DAN GOALS Goals:

Input Data

nilaimat <- read.csv("student-mat.csv",sep = ";")
nilaipor <- read.csv("student-por.csv",sep = ";")
head(nilaimat)
##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        3       yes     no  yes         no
## 4   mother          1         3        0        no    yes  yes        yes
## 5   father          1         2        0        no    yes  yes         no
## 6   mother          1         2        0        no    yes  yes        yes
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
## 6     yes    yes      yes       no      5        4     2    1    2      5
##   absences G1 G2 G3
## 1        6  5  6  6
## 2        4  5  5  6
## 3       10  7  8 10
## 4        2 15 14 15
## 5        4  6 10 10
## 6       10 15 15 15
head(nilaipor)
##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        0       yes     no   no         no
## 4   mother          1         3        0        no    yes   no        yes
## 5   father          1         2        0        no    yes   no         no
## 6   mother          1         2        0        no    yes   no        yes
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
## 6     yes    yes      yes       no      5        4     2    1    2      5
##   absences G1 G2 G3
## 1        4  0 11 11
## 2        2  9 11 11
## 3        6 12 13 12
## 4        0 14 14 14
## 5        0 11 13 13
## 6        6 12 12 13

DATA CLEANING

cek jumlah {baris, kolom}

dim(nilaimat)
## [1] 395  33
dim(nilaipor)
## [1] 649  33

Cek mising value

colSums(is.na(nilaimat))
##     school        sex        age    address    famsize    Pstatus       Medu 
##          0          0          0          0          0          0          0 
##       Fedu       Mjob       Fjob     reason   guardian traveltime  studytime 
##          0          0          0          0          0          0          0 
##   failures  schoolsup     famsup       paid activities    nursery     higher 
##          0          0          0          0          0          0          0 
##   internet   romantic     famrel   freetime      goout       Dalc       Walc 
##          0          0          0          0          0          0          0 
##     health   absences         G1         G2         G3 
##          0          0          0          0          0
colSums(is.na(nilaipor))
##     school        sex        age    address    famsize    Pstatus       Medu 
##          0          0          0          0          0          0          0 
##       Fedu       Mjob       Fjob     reason   guardian traveltime  studytime 
##          0          0          0          0          0          0          0 
##   failures  schoolsup     famsup       paid activities    nursery     higher 
##          0          0          0          0          0          0          0 
##   internet   romantic     famrel   freetime      goout       Dalc       Walc 
##          0          0          0          0          0          0          0 
##     health   absences         G1         G2         G3 
##          0          0          0          0          0

Cek Duplikat

sum(duplicated(nilaimat))
## [1] 0
sum(duplicated(nilaipor))
## [1] 0

Menghapus duplikat dan mengecek ukuran setelah menghapusnya

mat_clean <- na.omit(nilaimat)
por_clean <- na.omit(nilaipor)
por_clean <- por_clean %>% distinct()
mat_clean <- mat_clean %>% distinct()
dim(mat_clean)
## [1] 395  33
dim(por_clean)
## [1] 649  33

EDA

Top 10 siswa dengan nilai tertinggi pada tiap pelajaran

top10por <- por_clean %>%
  arrange(desc(G3)) %>%
  slice(1:10)
top10por
##    school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob
## 1      GP   F  17       R     LE3       T    3    1 services    other
## 2      MS   M  18       U     GT3       T    4    4  teacher  teacher
## 3      GP   M  15       U     LE3       T    4    2  teacher    other
## 4      GP   F  16       U     GT3       T    4    2   health services
## 5      GP   M  16       U     GT3       T    1    0    other    other
## 6      GP   M  17       R     GT3       T    1    2  at_home  at_home
## 7      GP   F  18       R     LE3       T    1    1  at_home    other
## 8      GP   F  18       U     GT3       T    2    2  at_home  at_home
## 9      GP   F  17       U     GT3       T    4    3   health services
## 10     GP   F  17       U     GT3       T    3    2   health   health
##        reason guardian traveltime studytime failures schoolsup famsup paid
## 1  reputation   mother          2         4        0        no    yes   no
## 2        home   father          1         2        0        no     no   no
## 3      course   mother          1         1        0        no     no   no
## 4        home   father          1         2        0        no     no   no
## 5  reputation   mother          2         2        0        no    yes   no
## 6        home   mother          1         2        0        no    yes   no
## 7  reputation   mother          2         4        0        no    yes   no
## 8       other   mother          1         3        0        no    yes   no
## 9  reputation   mother          1         3        0        no    yes   no
## 10 reputation   father          1         4        0        no    yes   no
##    activities nursery higher internet romantic famrel freetime goout Dalc Walc
## 1          no     yes    yes       no       no      3        1     2    1    1
## 2         yes      no    yes      yes       no      3        2     4    1    4
## 3          no     yes    yes      yes       no      3        5     2    1    1
## 4          no     yes    yes      yes      yes      4        2     3    1    1
## 5         yes     yes    yes      yes      yes      4        3     2    1    1
## 6         yes      no    yes       no      yes      3        5     2    2    2
## 7         yes     yes    yes       no       no      5        2     2    1    1
## 8          no     yes    yes      yes       no      4        3     3    1    2
## 9          no     yes    yes      yes       no      4        2     2    1    2
## 10        yes      no    yes      yes       no      5        2     2    1    2
##    health absences G1 G2 G3
## 1       3        0 18 19 19
## 2       2        4 17 18 19
## 3       3       10 18 17 18
## 4       3        0 17 17 18
## 5       3        0 16 17 18
## 6       1        2 16 17 18
## 7       3        2 17 17 18
## 8       2        0 18 18 18
## 9       3        0 17 18 18
## 10      5        0 18 18 18
top10mat <- mat_clean %>%
  arrange(desc(G3)) %>%
  slice(1:10)
top10mat
##    school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob
## 1      GP   M  16       U     GT3       T    4    3   health services
## 2      GP   M  15       U     LE3       A    3    2 services    other
## 3      GP   M  15       U     LE3       A    4    4  teacher  teacher
## 4      GP   M  15       U     LE3       T    4    2  teacher    other
## 5      GP   F  18       U     GT3       T    2    2  at_home  at_home
## 6      MS   F  18       R     LE3       T    4    4    other    other
## 7      GP   M  15       U     LE3       T    4    3  teacher services
## 8      GP   M  15       U     GT3       T    4    4 services  teacher
## 9      GP   F  15       U     GT3       T    4    3 services    other
## 10     GP   M  15       U     GT3       A    3    4 services    other
##        reason guardian traveltime studytime failures schoolsup famsup paid
## 1  reputation   mother          1         4        0        no     no   no
## 2        home   mother          1         2        0        no    yes  yes
## 3      course   mother          1         1        0        no     no   no
## 4      course   mother          1         1        0        no     no   no
## 5       other   mother          1         3        0        no    yes  yes
## 6  reputation   mother          2         3        0        no     no   no
## 7        home   mother          1         3        0        no    yes   no
## 8      course   father          1         2        0        no    yes   no
## 9  reputation   mother          1         1        0        no     no  yes
## 10     course   mother          1         2        0        no    yes  yes
##    activities nursery higher internet romantic famrel freetime goout Dalc Walc
## 1         yes     yes    yes      yes       no      4        2     2    1    1
## 2          no     yes    yes      yes       no      4        2     2    1    1
## 3         yes     yes    yes      yes       no      5        5     3    1    1
## 4          no     yes    yes      yes       no      3        5     2    1    1
## 5          no     yes    yes      yes       no      4        3     3    1    2
## 6          no     yes    yes      yes       no      5        4     4    1    1
## 7         yes     yes    yes      yes       no      5        4     3    1    1
## 8         yes     yes    yes      yes       no      4        3     3    1    1
## 9         yes     yes    yes      yes       no      4        5     5    1    3
## 10        yes     yes    yes      yes       no      5        4     4    1    1
##    health absences G1 G2 G3
## 1       2        4 19 19 20
## 2       1        0 16 18 19
## 3       4        6 18 19 19
## 4       3       10 18 19 19
## 5       2        5 18 18 19
## 6       1        0 19 18 19
## 7       4        2 15 16 18
## 8       5        2 19 18 18
## 9       1        4 16 17 18
## 10      1        0 16 18 18
ggplot(top10mat, aes(x = factor(1:nrow(top10mat)), y = G3)) +
  geom_col(fill = "red") +
  labs(
    title = "10 Nilai Tertinggi Mat (G3)",
    x = "Ranking",
    y = "Nilai"
  ) +
  theme_minimal()

ggplot(top10por, aes(x = factor(1:nrow(top10por)), y = G3)) +
  geom_col(fill = "blue") +
  labs(
    title = "10 Nilai Tertinggi Bahasa Portugis (G3)",
    x = "Ranking",
    y = "Nilai"
  ) +
  theme_minimal()

Distribusi nilai akhir G3

ggplot(por_clean, aes(x = G3)) +
  geom_histogram(binwidth = 1) +
  labs(title = "Distribusi Nilai Akhir (G3) - Portugis",
       x = "Nilai", y = "Frekuensi")

ggplot(mat_clean, aes(x = G3)) +
  geom_histogram(binwidth = 1) +
  labs(title = "Distribusi Nilai Akhir (G3) - Matematika",
       x = "Nilai", y = "Frekuensi")

Hubungan jam belajar dengan nilai akhir

ggplot(por_clean, aes(x = studytime, y = G3)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Jam Belajar dengan Nilai Akhir (Portugis)")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(mat_clean, aes(x = studytime, y = G3)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Jam Belajar dengan Nilai Akhir (Matematika)")
## `geom_smooth()` using formula = 'y ~ x'

Perbandingan nilai berdasarkan gender

ggplot(por_clean, aes(x = sex, y = G3)) +
  geom_boxplot() +
  labs(title = "Nilai Akhir berdasarkan Gender (Portugis)")

ggplot(mat_clean, aes(x = sex, y = G3)) +
  geom_boxplot() +
  labs(title = "Nilai Akhir berdasarkan Gender (Matematika)")