Introduction Dataset Summary Relationships

Introduction

This is the result of my data analysis using R studio regarding the test scores of twenty students.This dataset shows all information about student performance in exam. so exam score related with student study habits and background to support analysis of student performance. This dataset use in college, school and university ect, for student exam score student are pass or fail. This dataset are columns is: student_id, student study_hours, student sleep_hours, student attendes_peresntage, previous score and student final score in exam.

sumber data:kaggle

library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.1.4     âś” purrr     1.1.0
## âś” forcats   1.0.1     âś” stringr   1.5.2
## âś” ggplot2   4.0.0     âś” tibble    3.3.0
## âś” lubridate 1.9.4     âś” tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
UTS_Mike_Zidane <- read_csv("C:/Users/Michael Zidane/Downloads/UTS Mike Zidane.csv")
## Rows: 20 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): student_id
## dbl (5): hours_studied, sleep_hours, attendance_percent, previous_scores, ex...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(UTS_Mike_Zidane)
print(UTS_Mike_Zidane)
## # A tibble: 20 Ă— 6
##    student_id hours_studied sleep_hours attendance_percent previous_scores
##    <chr>              <dbl>       <dbl>              <dbl>           <dbl>
##  1 S001                 8           8.8               72.1              45
##  2 S002                 1.3         8.6               60.7              55
##  3 S003                 4           8.2               73.7              86
##  4 S004                 3.5         4.8               95.1              66
##  5 S005                 9.1         6.4               89.8              71
##  6 S006                 8.4         5.1               58.5              75
##  7 S007                10.8         6                 54.2              88
##  8 S008                 2           4.3               75.8              55
##  9 S009                 5.6         5.9               81.6              84
## 10 S010                 1.3         8.9               66.8              70
## 11 S011                 3.4         5.3               90.9              81
## 12 S012                 6.6         7.9               87.6              85
## 13 S013                 1.3         6.3               83.6              71
## 14 S014                 3.2         6.1               61.2              68
## 15 S015                 8.1         8.8               60                90
## 16 S016                 7           9                 51.2              41
## 17 S017                 3.4         6.8               62.2              45
## 18 S018                 7.5         7.6               73.8              58
## 19 S019                 9.9         4.8               92.5              54
## 20 S020                 1.1         5.5               53.6              65
## # ℹ 1 more variable: exam_score <dbl>

summary of all data

summary(UTS_Mike_Zidane)
##   student_id        hours_studied     sleep_hours    attendance_percent
##  Length:20          Min.   : 1.100   Min.   :4.300   Min.   :51.20     
##  Class :character   1st Qu.: 2.900   1st Qu.:5.450   1st Qu.:60.52     
##  Mode  :character   Median : 4.800   Median :6.350   Median :72.90     
##                     Mean   : 5.275   Mean   :6.755   Mean   :72.25     
##                     3rd Qu.: 8.025   3rd Qu.:8.300   3rd Qu.:84.60     
##                     Max.   :10.800   Max.   :9.000   Max.   :95.10     
##  previous_scores   exam_score   
##  Min.   :41.00   Min.   :17.10  
##  1st Qu.:55.00   1st Qu.:29.20  
##  Median :69.00   Median :34.05  
##  Mean   :67.65   Mean   :31.77  
##  3rd Qu.:81.75   3rd Qu.:35.73  
##  Max.   :90.00   Max.   :41.10

Hours Studied

summary(UTS_Mike_Zidane$hours_studied)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.100   2.900   4.800   5.275   8.025  10.800
mean(UTS_Mike_Zidane$hours_studied)
## [1] 5.275
median(UTS_Mike_Zidane$hours_studied)
## [1] 4.8
names(sort((-table(UTS_Mike_Zidane$hours_studied))))[1]
## [1] "1.3"

Sleep Hours

summary(UTS_Mike_Zidane$sleep_hours)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.300   5.450   6.350   6.755   8.300   9.000
mean(UTS_Mike_Zidane$sleep_hours)
## [1] 6.755
median(UTS_Mike_Zidane$sleep_hours)
## [1] 6.35
names(sort(-table(UTS_Mike_Zidane)))[1]
## NULL

Attendance Percent

summary(UTS_Mike_Zidane$attendance_percent)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   51.20   60.52   72.90   72.25   84.60   95.10
mean(UTS_Mike_Zidane$attendance_percent)
## [1] 72.245
median(UTS_Mike_Zidane$attendance_percent)
## [1] 72.9
names(sort(-table(UTS_Mike_Zidane)))[1]
## NULL

Previous Score

summary(UTS_Mike_Zidane$previous_scores)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.00   55.00   69.00   67.65   81.75   90.00
mean(UTS_Mike_Zidane$previous_scores)
## [1] 67.65
median(UTS_Mike_Zidane$previous_scores)
## [1] 69
names(sort(-table(UTS_Mike_Zidane)))[1]
## NULL

Exam Score

summary(UTS_Mike_Zidane$exam_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.10   29.20   34.05   31.77   35.73   41.10
mean(UTS_Mike_Zidane$exam_score)
## [1] 31.775
names(sort(-table(UTS_Mike_Zidane)))[1]
## NULL

Visualisasi data

# Boxplot for Hours Studied
ggplot(UTS_Mike_Zidane, aes(x = "", y = hours_studied)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7) +
  geom_jitter(width = 0.1, color = "darkblue", alpha = 0.5) +
  labs(title = "Distribution of Hours Studied",
       y = "Hours Studied", x = "") +
  theme_minimal()

# Boxplot for Sleep Hours
ggplot(UTS_Mike_Zidane, aes(x = "", y = sleep_hours)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.7) +
  geom_jitter(width = 0.1, color = "darkgreen", alpha = 0.5) +
  labs(title = "Distribution of Sleep Hours",
       y = "Sleep Hours", x = "") +
  theme_minimal()

# Boxplot for Attendance Percent
ggplot(UTS_Mike_Zidane, aes(x = "", y = attendance_percent)) +
  geom_boxplot(fill = "lightcoral", alpha = 0.7) +
  geom_jitter(width = 0.1, color = "darkred", alpha = 0.5) +
  labs(title = "Distribution of Attendance Percentage",
       y = "Attendance Percentage", x = "") +
  theme_minimal()

# Boxplot for Exam Score
ggplot(UTS_Mike_Zidane, aes(x = "", y = exam_score)) +
  geom_boxplot(fill = "orange", alpha = 0.7) +
  geom_jitter(width = 0.1, color = "brown", alpha = 0.5) +
  labs(title = "Distribution of Exam Scores",
       y = "Exam Score", x = "") +
  theme_minimal()

# Boxplot for Previous Score
ggplot(UTS_Mike_Zidane, aes(x = "", y = previous_scores)) +
  geom_boxplot(fill = "plum", alpha = 0.7) +
  geom_jitter(width = 0.1, color = "purple", alpha = 0.5) +
  labs(title = "Distribution of Previous Scores",
       y = "Previous Score", x = "") +
  theme_minimal()

# Regresi Linear

library(ggplot2)

ggplot(UTS_Mike_Zidane, aes(x = hours_studied, y = exam_score)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Relationship between Hours Studied and Exam Score",
       x = "Hours Studied", y = "Exam Score")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)

ggplot(UTS_Mike_Zidane, aes(x = sleep_hours, y = exam_score)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Relationship between sleep hours and Exam Score",
       x = "sleep hours", y = "Exam Score")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)

ggplot(UTS_Mike_Zidane, aes(x = attendance_percent, y = exam_score)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Relationship between attendance percent and Exam Score",
       x = "Attendance percent", y = "Exam Score")
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)

ggplot(UTS_Mike_Zidane, aes(x = previous_scores, y = exam_score)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Relationship between Previous Score and Exam Score",
       x = "Previous Score", y = "Exam Score")
## `geom_smooth()` using formula = 'y ~ x'

cor(UTS_Mike_Zidane$hours_studied, UTS_Mike_Zidane$exam_score)
## [1] 0.7723778
cor(UTS_Mike_Zidane$sleep_hours, UTS_Mike_Zidane$exam_score)
## [1] 0.1765774
cor(UTS_Mike_Zidane$attendance_percent, UTS_Mike_Zidane$exam_score)
## [1] 0.2131541
cor(UTS_Mike_Zidane$previous_scores, UTS_Mike_Zidane$exam_score)
## [1] 0.3981284
# Correlation heatmap visualization
library(corrplot)
## corrplot 0.95 loaded
# pilih hanya kolom numerik
num_data <- UTS_Mike_Zidane[, c("hours_studied", "sleep_hours", "attendance_percent", "previous_scores", "exam_score")]

# hitung korelasi
corr_matrix <- cor(num_data)

# tampilkan peta korelasi
corrplot(corr_matrix, method = "color", type = "upper",
         addCoef.col = "black", tl.col = "black", tl.srt = 45,
         title = "Correlation Heatmap of Student Variables",
         mar = c(0,0,2,0))

Conclusion

Based on the descriptive analysis, it can be concluded that:

  • Students who spend more time studying tend to achieve higher exam scores.

  • Adequate sleep hours appear to support better academic performance, suggesting the importance of balanced rest.

  • Attendance percentage and previous academic scores also show a consistent positive relationship with exam outcomes.

  • Overall, study habits, sleep quality, and consistent attendance contribute significantly to students’ success.

These findings highlight the importance of effective time management and maintaining a healthy study-life balance to optimize academic results.