data <- read_csv("Expanded_data_with_more_features.csv")
## New names:
## Rows: 30641 Columns: 15
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): Gender, EthnicGroup, ParentEduc, LunchType, TestPrep, ParentMarita... dbl
## (5): ...1, NrSiblings, MathScore, ReadingScore, WritingScore
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
head(data)
## # A tibble: 6 × 15
## ...1 Gender EthnicGroup ParentEduc LunchType TestPrep ParentMaritalStatus
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 0 female <NA> bachelor's de… standard none married
## 2 1 female group C some college standard <NA> married
## 3 2 female group B master's degr… standard none single
## 4 3 male group A associate's d… free/red… none married
## 5 4 male group C some college standard none married
## 6 5 female group B associate's d… standard none married
## # ℹ 8 more variables: PracticeSport <chr>, IsFirstChild <chr>,
## # NrSiblings <dbl>, TransportMeans <chr>, WklyStudyHours <chr>,
## # MathScore <dbl>, ReadingScore <dbl>, WritingScore <dbl>
ggplot(data, aes(x = Gender)) +
geom_bar(fill = "skyblue") +
labs(title = "Distribusi Gender", x = "Gender", y = "Jumlah")
ggplot(data, aes(x = EthnicGroup)) +
geom_bar(fill = "orange") +
labs(title = "Distribusi Kelompok Etnis", x = "Kelompok Etnis", y = "Jumlah")
ggplot(data, aes(x = ParentEduc)) +
geom_bar(fill = "purple") +
labs(title = "Latar Belakang Pendidikan Orang Tua", x = "Pendidikan", y = "Jumlah") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(data, aes(x = LunchType)) +
geom_bar(fill = "green") +
labs(title = "Jenis Makan Siang", x = "Tipe", y = "Jumlah")
ggplot(data, aes(x = TestPrep)) +
geom_bar(fill = "coral") +
labs(title = "Persiapan Ujian", x = "Status", y = "Jumlah")
ggplot(data, aes(x = ParentMaritalStatus)) +
geom_bar(fill = "steelblue") +
labs(title = "Status Perkawinan Orang Tua", x = "Status", y = "Jumlah")
ggplot(data, aes(x = PracticeSport)) +
geom_bar(fill = "brown") +
labs(title = "Frekuensi Berolahraga", x = "Frekuensi", y = "Jumlah")
ggplot(data, aes(x = IsFirstChild)) +
geom_bar(fill = "darkcyan") +
labs(title = "Apakah Anak Pertama", x = "Ya/Tidak", y = "Jumlah")
ggplot(data, aes(x = NrSiblings)) +
geom_bar(fill = "darkorange") +
labs(title = "Jumlah Saudara Kandung", x = "Jumlah Saudara", y = "Jumlah Siswa")
## Warning: Removed 1572 rows containing non-finite outside the scale range
## (`stat_count()`).
ggplot(data, aes(x = TransportMeans)) +
geom_bar(fill = "violet") +
labs(title = "Sarana Transportasi ke Sekolah", x = "Sarana", y = "Jumlah")
ggplot(data, aes(x = WklyStudyHours)) +
geom_bar(fill = "darkgreen") +
labs(title = "Jam Belajar Mandiri per Minggu", x = "Kategori Jam", y = "Jumlah")
ggplot(data, aes(x = MathScore)) +
geom_histogram(binwidth = 5, fill = "dodgerblue") +
labs(title = "Distribusi Nilai Matematika", x = "Nilai", y = "Frekuensi")
ggplot(data, aes(x = ReadingScore)) +
geom_histogram(binwidth = 5, fill = "salmon") +
labs(title = "Distribusi Nilai Membaca", x = "Nilai", y = "Frekuensi")
par(mfrow = c(1, 3)) hist(data\(MathScore, main = "Distribusi Nilai Math", col = "skyblue", xlab = "Math Score") hist(data\)ReadingScore, main = “Distribusi Nilai Reading”, col = “salmon”, xlab = “Reading Score”) hist(data$WritingScore, main = “Distribusi Nilai Writing”, col = “seagreen”, xlab = “Writing Score”)
par(mfrow = c(1, 3)) boxplot(data\(MathScore, main = "Boxplot Math", col = "skyblue") boxplot(data\)ReadingScore, main = “Boxplot Reading”, col = “salmon”) boxplot(data$WritingScore, main = “Boxplot Writing”, col = “seagreen”)
cat(“Outlier adalah nilai yang jauh berbeda dari mayoritas data lainnya.”) cat(“Cara memeriksa: gunakan boxplot atau rumus IQR (Q3 - Q1).”) cat(“Menanggulangi outlier: hapus, transformasi data, atau gunakan metode robust.”)
ggplot(data, aes(x = WklyStudyHours)) + geom_bar(fill = “darkorange”) + labs(title = “Distribusi Jam Belajar Mandiri”, x = “Kategori Jam Belajar”, y = “Jumlah Siswa”)
data_long <- data %>% pivot_longer(cols = c(MathScore, ReadingScore, WritingScore), names_to = “Subject”, values_to = “Score”)
ggplot(data_long, aes(x = EthnicGroup, y = Score, fill = Subject)) + geom_boxplot() + labs(title = “Perbandingan Nilai per Etnis”, x = “Kelompok Etnis”, y = “Nilai”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
nilai <- data[, c(“MathScore”, “ReadingScore”, “WritingScore”)] corr_matrix <- cor(na.omit(nilai)) corrplot(corr_matrix, method = “color”, addCoef.col = “black”, tl.cex = 0.8)
ggboxplot(data, x = “PracticeSport”, y = “MathScore”, color = “PracticeSport”, palette = “jco”, ylab = “Math Score”, xlab = “Olahraga”)
t.test(MathScore ~ PracticeSport, data = data)