==============================================================================

STUDY CASE WEEK 4

==============================================================================

Nama : Bagoes Nugroho Dwi Ariyanto

NRP : 5003251017

Kelas : D

==============================================================================

Import data dan library

library(ggplot2)
df <- read.csv("StudentsPerformance.csv")
head(df)
##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78

Membuat variabel passed

avg_score <- rowMeans(df[,c("math.score", "reading.score", "writing.score")])
df$passed <- ifelse(avg_score >= 50, 1, 0)
head(df)
##   gender race.ethnicity parental.level.of.education        lunch
## 1 female        group B           bachelor's degree     standard
## 2 female        group C                some college     standard
## 3 female        group B             master's degree     standard
## 4   male        group A          associate's degree free/reduced
## 5   male        group C                some college     standard
## 6 female        group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score passed
## 1                    none         72            72            74      1
## 2               completed         69            90            88      1
## 3                    none         90            95            93      1
## 4                    none         47            57            44      0
## 5                    none         76            78            75      1
## 6                    none         71            83            78      1

Satu Variabel

Histogram Plot Math Score

ggplot(df, aes(x = math.score))+
  geom_histogram(binwidth = 10, fill = "blue", color = "black")+
  labs(
    title = "Distribusi Math Score",
    x = "Math Score",
    y = "Frekuensi"
    )+
  theme_minimal()

Box Plot Reading Score

ggplot(df, aes(reading.score))+
  geom_boxplot()+
  labs(
    title = "Plot Reading Score",
    x = "Reading Score"   
    )+
  theme_minimal()

Density Plot Writing Score

ggplot(df, aes(writing.score))+
  geom_density(alpha = 0.5, fill = "skyblue")+
  labs(title = "Writing Score Distribution",
       x = "Writing Score")+
  theme_minimal()

Dua Variabel

Scatter Plot Reading vs Writing Score

ggplot(df, aes(x = reading.score, y = writing.score))+
  geom_point(color = "blue")+
  labs(
    title = "Reading Score vs Writing Score",
    x = "Reading Score",
    y = "Writing Score"
  )+
  theme_minimal()

Tiga Variabel

Corelation Heatmap

library(corrplot)
## corrplot 0.95 loaded
df_num <- df[,c("math.score", "reading.score", "writing.score")]
corr_matrix <- cor(df_num)
corrplot(corr_matrix,
         method = "color",
         type = "upper",
         addCoef.col = "black")