R Markdown

# Cek dan install paket yang dibutuhkan
needed <- c("tidyverse", "plotrix", "gridExtra", "ggpubr", "psych")
to_install <- needed[!(needed %in% installed.packages()[,"Package"])]
if (length(to_install)) install.packages(to_install)

# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotrix)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:plotrix':
## 
##     rescale
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# Opsi global agar grafik rapi
knitr::opts_chunk$set(
  echo = TRUE,
  message = FALSE,
  warning = FALSE
)
file_path <- "D:/UINSA Ngajar/Ganjil 2022_2023/StatMat _ UINSA/StudentsPerformance.csv"

if(!file.exists(file_path)){
  message("File tidak ditemukan di path. Silakan pilih secara manual.")
  file_path <- file.choose()
}

df <- read.csv(file_path, stringsAsFactors = FALSE, na.strings = c("", "NA"))
head(df)
##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78

Bersihkan Bersihkan Nama Kolom

names(df) <- names(df) %>%
  tolower() %>%
  str_replace_all("\\s+", ".") %>%
  str_replace_all("\\.+", ".")

print(names(df))
## [1] "gender"                      "race.ethnicity"             
## [3] "parental.level.of.education" "lunch"                      
## [5] "test.preparation.course"     "math.score"                 
## [7] "reading.score"               "writing.score"
expected <- c("gender","race.ethnicity","parental.level.of.education","lunch","test.preparation.course",
              "math.score","reading.score","writing.score")
missing <- expected[!(expected %in% names(df))]
if(length(missing)>0){
  warning("Beberapa kolom tidak ditemukan: ", paste(missing, collapse=", "))
}

Pastikan Skor Numeric

df <- df %>%
  mutate(
    math.score = as.numeric(.data[["math.score"]]),
    reading.score = as.numeric(.data[["reading.score"]]),
    writing.score = as.numeric(.data[["writing.score"]])
  )

sapply(df[c("math.score","reading.score","writing.score")], function(x) sum(is.na(x)))
##    math.score reading.score writing.score 
##             0             0             0

Tambahkan Total & Rata-rata Skor

df <- df %>%
  mutate(
    total.score = math.score + reading.score + writing.score,
    avg.score = (math.score + reading.score + writing.score) / 3
  )

head(df[, c("math.score","reading.score","writing.score","total.score","avg.score")])
##   math.score reading.score writing.score total.score avg.score
## 1         72            72            74         218  72.66667
## 2         69            90            88         247  82.33333
## 3         90            95            93         278  92.66667
## 4         47            57            44         148  49.33333
## 5         76            78            75         229  76.33333
## 6         71            83            78         232  77.33333

SUMMARY DESKRIPSI DATA

summary(df[, c("total.score","avg.score")])
##   total.score      avg.score     
##  Min.   : 27.0   Min.   :  9.00  
##  1st Qu.:175.0   1st Qu.: 58.33  
##  Median :205.0   Median : 68.33  
##  Mean   :203.3   Mean   : 67.77  
##  3rd Qu.:233.0   3rd Qu.: 77.67  
##  Max.   :300.0   Max.   :100.00

PIE CHART

library(plotrix)

# Fungsi bantu untuk membuat pie chart 3D dengan persentase
buat_pie3d <- function(data, kolom, judul){
  df_prop <- data %>%
    count(!!sym(kolom)) %>%
    mutate(persen = n / sum(n) * 100,
           label = paste0(!!sym(kolom), " (", round(persen, 1), "%)"))
  
  pie3D(df_prop$n,
        labels = df_prop$label,
        explode = 0.1,              # sedikit memisahkan tiap sektor agar jelas
        col = rainbow(length(df_prop$n)),
        main = judul,
        labelcex = 0.8)
}

# Pie chart 3D untuk gender
buat_pie3d(df, "gender", "Distribusi Gender (3D Pie Chart)")

# Pie chart 3D untuk race.ethnicity
buat_pie3d(df, "race.ethnicity", "Distribusi Race/Ethnicity (3D Pie Chart)")

# Pie chart 3D untuk parental.level.of.education
buat_pie3d(df, "parental.level.of.education", "Distribusi Pendidikan Orang Tua (3D Pie Chart)")