R Markdown
# Cek dan install paket yang dibutuhkan
needed <- c("tidyverse", "plotrix", "gridExtra", "ggpubr", "psych")
to_install <- needed[!(needed %in% installed.packages()[,"Package"])]
if (length(to_install)) install.packages(to_install)
# Load library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotrix)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(ggpubr)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:plotrix':
##
## rescale
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Opsi global agar grafik rapi
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
file_path <- "D:/UINSA Ngajar/Ganjil 2022_2023/StatMat _ UINSA/StudentsPerformance.csv"
if(!file.exists(file_path)){
message("File tidak ditemukan di path. Silakan pilih secara manual.")
file_path <- file.choose()
}
df <- read.csv(file_path, stringsAsFactors = FALSE, na.strings = c("", "NA"))
head(df)
## gender race.ethnicity parental.level.of.education lunch
## 1 female group B bachelor's degree standard
## 2 female group C some college standard
## 3 female group B master's degree standard
## 4 male group A associate's degree free/reduced
## 5 male group C some college standard
## 6 female group B associate's degree standard
## test.preparation.course math.score reading.score writing.score
## 1 none 72 72 74
## 2 completed 69 90 88
## 3 none 90 95 93
## 4 none 47 57 44
## 5 none 76 78 75
## 6 none 71 83 78
Bersihkan Bersihkan Nama Kolom
names(df) <- names(df) %>%
tolower() %>%
str_replace_all("\\s+", ".") %>%
str_replace_all("\\.+", ".")
print(names(df))
## [1] "gender" "race.ethnicity"
## [3] "parental.level.of.education" "lunch"
## [5] "test.preparation.course" "math.score"
## [7] "reading.score" "writing.score"
expected <- c("gender","race.ethnicity","parental.level.of.education","lunch","test.preparation.course",
"math.score","reading.score","writing.score")
missing <- expected[!(expected %in% names(df))]
if(length(missing)>0){
warning("Beberapa kolom tidak ditemukan: ", paste(missing, collapse=", "))
}
Pastikan Skor Numeric
df <- df %>%
mutate(
math.score = as.numeric(.data[["math.score"]]),
reading.score = as.numeric(.data[["reading.score"]]),
writing.score = as.numeric(.data[["writing.score"]])
)
sapply(df[c("math.score","reading.score","writing.score")], function(x) sum(is.na(x)))
## math.score reading.score writing.score
## 0 0 0
Tambahkan Total & Rata-rata Skor
df <- df %>%
mutate(
total.score = math.score + reading.score + writing.score,
avg.score = (math.score + reading.score + writing.score) / 3
)
head(df[, c("math.score","reading.score","writing.score","total.score","avg.score")])
## math.score reading.score writing.score total.score avg.score
## 1 72 72 74 218 72.66667
## 2 69 90 88 247 82.33333
## 3 90 95 93 278 92.66667
## 4 47 57 44 148 49.33333
## 5 76 78 75 229 76.33333
## 6 71 83 78 232 77.33333
SUMMARY DESKRIPSI DATA
summary(df[, c("total.score","avg.score")])
## total.score avg.score
## Min. : 27.0 Min. : 9.00
## 1st Qu.:175.0 1st Qu.: 58.33
## Median :205.0 Median : 68.33
## Mean :203.3 Mean : 67.77
## 3rd Qu.:233.0 3rd Qu.: 77.67
## Max. :300.0 Max. :100.00
PIE CHART
library(plotrix)
# Fungsi bantu untuk membuat pie chart 3D dengan persentase
buat_pie3d <- function(data, kolom, judul){
df_prop <- data %>%
count(!!sym(kolom)) %>%
mutate(persen = n / sum(n) * 100,
label = paste0(!!sym(kolom), " (", round(persen, 1), "%)"))
pie3D(df_prop$n,
labels = df_prop$label,
explode = 0.1, # sedikit memisahkan tiap sektor agar jelas
col = rainbow(length(df_prop$n)),
main = judul,
labelcex = 0.8)
}
# Pie chart 3D untuk gender
buat_pie3d(df, "gender", "Distribusi Gender (3D Pie Chart)")

# Pie chart 3D untuk race.ethnicity
buat_pie3d(df, "race.ethnicity", "Distribusi Race/Ethnicity (3D Pie Chart)")

# Pie chart 3D untuk parental.level.of.education
buat_pie3d(df, "parental.level.of.education", "Distribusi Pendidikan Orang Tua (3D Pie Chart)")
