df <- read.csv("StudentPerformanceFactors.csv")

head(df)
##   Hours_Studied Attendance Parental_Involvement Access_to_Resources
## 1            23         84                  Low                High
## 2            19         64                  Low              Medium
## 3            24         98               Medium              Medium
## 4            29         89                  Low              Medium
## 5            19         92               Medium              Medium
## 6            19         88               Medium              Medium
##   Extracurricular_Activities Sleep_Hours Previous_Scores Motivation_Level
## 1                         No           7              73              Low
## 2                         No           8              59              Low
## 3                        Yes           7              91           Medium
## 4                        Yes           8              98           Medium
## 5                        Yes           6              65           Medium
## 6                        Yes           8              89           Medium
##   Internet_Access Tutoring_Sessions Family_Income Teacher_Quality School_Type
## 1             Yes                 0           Low          Medium      Public
## 2             Yes                 2        Medium          Medium      Public
## 3             Yes                 2        Medium          Medium      Public
## 4             Yes                 1        Medium          Medium      Public
## 5             Yes                 3        Medium            High      Public
## 6             Yes                 3        Medium          Medium      Public
##   Peer_Influence Physical_Activity Learning_Disabilities
## 1       Positive                 3                    No
## 2       Negative                 4                    No
## 3        Neutral                 4                    No
## 4       Negative                 4                    No
## 5        Neutral                 4                    No
## 6       Positive                 3                    No
##   Parental_Education_Level Distance_from_Home Gender Exam_Score
## 1              High School               Near   Male         67
## 2                  College           Moderate Female         61
## 3             Postgraduate               Near   Male         74
## 4              High School           Moderate   Male         71
## 5                  College               Near Female         70
## 6             Postgraduate               Near   Male         71
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(df, aes(x = Exam_Score)) +
geom_histogram(bins = 20, fill = "steelblue") +
theme_minimal() +
labs(title = "Distribusi Nilai Ujian",
x = "Nilai Ujian",
y = "Frekuensi")

ggplot(df, aes(x = Hours_Studied, y = Exam_Score)) +
  geom_point(color = "#6A5ACD", size = 3, alpha = 0.7) +   # Ungu soft
  geom_smooth(method = "lm", se = FALSE, color = "#FF4500", linewidth = 1.2) +  # Orange terang
  theme_minimal() +
  labs(title = "Korelasi Jam Belajar vs Nilai Ujian",
       x = "Jam Belajar",
       y = "Nilai Ujian")
## `geom_smooth()` using formula = 'y ~ x'

df %>%
group_by(Motivation_Level) %>%
summarise(MeanScore = mean(Exam_Score)) %>%
ggplot(aes(x = Motivation_Level, y = MeanScore, fill = Motivation_Level)) +
geom_col() +
theme_minimal() +
labs(title = "Rata-rata Nilai Berdasarkan Motivation Level",
x = "Motivation Level",
y = "Rata-rata Nilai")

ggplot(df, aes(x = Gender, y = Hours_Studied, fill = Gender)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Perbandingan Jam Belajar Berdasarkan Gender",
       x = "Gender",
       y = "Jam Belajar")