library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("C:/Users/13074/Desktop/course/PA470/Assignment 1/Exam_Score_Prediction.csv")
str(data)
## 'data.frame': 20000 obs. of 12 variables:
## $ student_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 17 23 22 20 20 23 17 22 18 17 ...
## $ course : chr "diploma" "bca" "b.sc" "diploma" ...
## $ study_hours : num 2.78 3.37 7.88 0.67 0.89 3.48 1.35 5.48 2.89 6.77 ...
## $ class_attendance: num 92.9 64.8 76.8 48.4 71.6 65.4 69 51.1 92 44.8 ...
## $ internet_access : chr "yes" "yes" "yes" "yes" ...
## $ sleep_hours : num 7.4 4.6 8.5 5.8 9.8 4.2 7.4 8.2 6.6 9.8 ...
## $ sleep_quality : chr "poor" "average" "poor" "average" ...
## $ study_method : chr "coaching" "online videos" "coaching" "online videos" ...
## $ facility_rating : chr "low" "medium" "high" "low" ...
## $ exam_difficulty : chr "hard" "moderate" "moderate" "moderate" ...
## $ exam_score : num 58.9 54.8 90.3 29.7 43.7 58.2 53.7 47.3 44.9 77.7 ...
glimpse(data)
## Rows: 20,000
## Columns: 12
## $ student_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ age <int> 17, 23, 22, 20, 20, 23, 17, 22, 18, 17, 21, 24, 22, 2…
## $ course <chr> "diploma", "bca", "b.sc", "diploma", "diploma", "b.te…
## $ study_hours <dbl> 2.78, 3.37, 7.88, 0.67, 0.89, 3.48, 1.35, 5.48, 2.89,…
## $ class_attendance <dbl> 92.9, 64.8, 76.8, 48.4, 71.6, 65.4, 69.0, 51.1, 92.0,…
## $ internet_access <chr> "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes…
## $ sleep_hours <dbl> 7.4, 4.6, 8.5, 5.8, 9.8, 4.2, 7.4, 8.2, 6.6, 9.8, 5.8…
## $ sleep_quality <chr> "poor", "average", "poor", "average", "poor", "good",…
## $ study_method <chr> "coaching", "online videos", "coaching", "online vide…
## $ facility_rating <chr> "low", "medium", "high", "low", "low", "low", "high",…
## $ exam_difficulty <chr> "hard", "moderate", "moderate", "moderate", "moderate…
## $ exam_score <dbl> 58.9, 54.8, 90.3, 29.7, 43.7, 58.2, 53.7, 47.3, 44.9,…
colSums(is.na(data))
## student_id age course study_hours
## 0 0 0 9
## class_attendance internet_access sleep_hours sleep_quality
## 11 0 0 0
## study_method facility_rating exam_difficulty exam_score
## 0 0 0 0
summary(data)
## student_id age course study_hours
## Min. : 1 Min. :17.00 Length:20000 Min. :0.080
## 1st Qu.: 5001 1st Qu.:18.00 Class :character 1st Qu.:2.000
## Median :10000 Median :20.00 Mode :character Median :4.040
## Mean :10001 Mean :20.47 Mean :4.008
## 3rd Qu.:15000 3rd Qu.:22.00 3rd Qu.:6.000
## Max. :20001 Max. :24.00 Max. :7.910
## NA's :9
## class_attendance internet_access sleep_hours sleep_quality
## Min. :40.60 Length:20000 Min. :4.100 Length:20000
## 1st Qu.:55.10 Class :character 1st Qu.:5.500 Class :character
## Median :69.90 Mode :character Median :7.000 Mode :character
## Mean :70.02 Mean :7.009
## 3rd Qu.:85.00 3rd Qu.:8.500
## Max. :99.40 Max. :9.900
## NA's :11
## study_method facility_rating exam_difficulty exam_score
## Length:20000 Length:20000 Length:20000 Min. : 19.60
## Class :character Class :character Class :character 1st Qu.: 48.80
## Mode :character Mode :character Mode :character Median : 62.60
## Mean : 62.51
## 3rd Qu.: 76.30
## Max. :100.00
##
#summary the numeric variables
data%>%
select(where(is.numeric))%>%
summary()
## student_id age study_hours class_attendance
## Min. : 1 Min. :17.00 Min. :0.080 Min. :40.60
## 1st Qu.: 5001 1st Qu.:18.00 1st Qu.:2.000 1st Qu.:55.10
## Median :10000 Median :20.00 Median :4.040 Median :69.90
## Mean :10001 Mean :20.47 Mean :4.008 Mean :70.02
## 3rd Qu.:15000 3rd Qu.:22.00 3rd Qu.:6.000 3rd Qu.:85.00
## Max. :20001 Max. :24.00 Max. :7.910 Max. :99.40
## NA's :9 NA's :11
## sleep_hours exam_score
## Min. :4.100 Min. : 19.60
## 1st Qu.:5.500 1st Qu.: 48.80
## Median :7.000 Median : 62.60
## Mean :7.009 Mean : 62.51
## 3rd Qu.:8.500 3rd Qu.: 76.30
## Max. :9.900 Max. :100.00
##