==============================================================================
STUDY CASE WEEK 4
==============================================================================
Nama : Bagoes Nugroho Dwi Ariyanto
NRP : 5003251017
Kelas : D
==============================================================================
Import data dan library
library(ggplot2)
df <- read.csv("StudentsPerformance.csv")
head(df)
## gender race.ethnicity parental.level.of.education lunch
## 1 female group B bachelor's degree standard
## 2 female group C some college standard
## 3 female group B master's degree standard
## 4 male group A associate's degree free/reduced
## 5 male group C some college standard
## 6 female group B associate's degree standard
## test.preparation.course math.score reading.score writing.score
## 1 none 72 72 74
## 2 completed 69 90 88
## 3 none 90 95 93
## 4 none 47 57 44
## 5 none 76 78 75
## 6 none 71 83 78
Membuat variabel passed
avg_score <- rowMeans(df[,c("math.score", "reading.score", "writing.score")])
df$passed <- ifelse(avg_score >= 50, 1, 0)
head(df)
## gender race.ethnicity parental.level.of.education lunch
## 1 female group B bachelor's degree standard
## 2 female group C some college standard
## 3 female group B master's degree standard
## 4 male group A associate's degree free/reduced
## 5 male group C some college standard
## 6 female group B associate's degree standard
## test.preparation.course math.score reading.score writing.score passed
## 1 none 72 72 74 1
## 2 completed 69 90 88 1
## 3 none 90 95 93 1
## 4 none 47 57 44 0
## 5 none 76 78 75 1
## 6 none 71 83 78 1
Satu Variabel
Histogram Plot Math Score
ggplot(df, aes(x = math.score))+
geom_histogram(binwidth = 10, fill = "blue", color = "black")+
labs(
title = "Distribusi Math Score",
x = "Math Score",
y = "Frekuensi"
)+
theme_minimal()

Box Plot Reading Score
ggplot(df, aes(reading.score))+
geom_boxplot()+
labs(
title = "Plot Reading Score",
x = "Reading Score"
)+
theme_minimal()

Density Plot Writing Score
ggplot(df, aes(writing.score))+
geom_density(alpha = 0.5, fill = "skyblue")+
labs(title = "Writing Score Distribution",
x = "Writing Score")+
theme_minimal()

Dua Variabel
Scatter Plot Reading vs Writing Score
ggplot(df, aes(x = reading.score, y = writing.score))+
geom_point(color = "blue")+
labs(
title = "Reading Score vs Writing Score",
x = "Reading Score",
y = "Writing Score"
)+
theme_minimal()

Tiga Variabel
Corelation Heatmap
library(corrplot)
## corrplot 0.95 loaded
df_num <- df[,c("math.score", "reading.score", "writing.score")]
corr_matrix <- cor(df_num)
corrplot(corr_matrix,
method = "color",
type = "upper",
addCoef.col = "black")
