library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
mpg<- read_delim("C:/Users/kondo/OneDrive/Desktop/INTRO to Statistics and R/Data Set and work/data.csv", delim = ";",show_col_types = FALSE)
glimpse(mpg)
## Rows: 4,424
## Columns: 37
## $ `Marital status` <dbl> 1, 1, 1, 1, 2, 2, 1, …
## $ `Application mode` <dbl> 17, 15, 1, 17, 39, 39…
## $ `Application order` <dbl> 5, 1, 5, 2, 1, 1, 1, …
## $ Course <dbl> 171, 9254, 9070, 9773…
## $ `Daytime/evening attendance\t` <dbl> 1, 1, 1, 1, 0, 0, 1, …
## $ `Previous qualification` <dbl> 1, 1, 1, 1, 1, 19, 1,…
## $ `Previous qualification (grade)` <dbl> 122.0, 160.0, 122.0, …
## $ Nacionality <dbl> 1, 1, 1, 1, 1, 1, 1, …
## $ `Mother's qualification` <dbl> 19, 1, 37, 38, 37, 37…
## $ `Father's qualification` <dbl> 12, 3, 37, 37, 38, 37…
## $ `Mother's occupation` <dbl> 5, 3, 9, 5, 9, 9, 7, …
## $ `Father's occupation` <dbl> 9, 3, 9, 3, 9, 7, 10,…
## $ `Admission grade` <dbl> 127.3, 142.5, 124.8, …
## $ Displaced <dbl> 1, 1, 1, 1, 0, 0, 1, …
## $ `Educational special needs` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ Debtor <dbl> 0, 0, 0, 0, 0, 1, 0, …
## $ `Tuition fees up to date` <dbl> 1, 0, 0, 1, 1, 1, 1, …
## $ Gender <dbl> 1, 1, 1, 0, 0, 1, 0, …
## $ `Scholarship holder` <dbl> 0, 0, 0, 0, 0, 0, 1, …
## $ `Age at enrollment` <dbl> 20, 19, 19, 20, 45, 5…
## $ International <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 1st sem (credited)` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 1st sem (enrolled)` <dbl> 0, 6, 6, 6, 6, 5, 7, …
## $ `Curricular units 1st sem (evaluations)` <dbl> 0, 6, 0, 8, 9, 10, 9,…
## $ `Curricular units 1st sem (approved)` <dbl> 0, 6, 0, 6, 5, 5, 7, …
## $ `Curricular units 1st sem (grade)` <dbl> 0.00000, 14.00000, 0.…
## $ `Curricular units 1st sem (without evaluations)` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 2nd sem (credited)` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 2nd sem (enrolled)` <dbl> 0, 6, 6, 6, 6, 5, 8, …
## $ `Curricular units 2nd sem (evaluations)` <dbl> 0, 6, 0, 10, 6, 17, 8…
## $ `Curricular units 2nd sem (approved)` <dbl> 0, 6, 0, 5, 6, 5, 8, …
## $ `Curricular units 2nd sem (grade)` <dbl> 0.00000, 13.66667, 0.…
## $ `Curricular units 2nd sem (without evaluations)` <dbl> 0, 0, 0, 0, 0, 5, 0, …
## $ `Unemployment rate` <dbl> 10.8, 13.9, 10.8, 9.4…
## $ `Inflation rate` <dbl> 1.4, -0.3, 1.4, -0.8,…
## $ GDP <dbl> 1.74, 0.79, 1.74, -3.…
## $ Target <chr> "Dropout", "Graduate"…
This Data is related to students enrolled in different undergraduate degrees, such as agronomy, design, education, nursing, journalism, management, social service, and technologies.The data is used to Analyse the number of students’ dropout and their academic success
summary(mpg)
## Marital status Application mode Application order Course
## Min. :1.000 Min. : 1.00 Min. :0.000 Min. : 33
## 1st Qu.:1.000 1st Qu.: 1.00 1st Qu.:1.000 1st Qu.:9085
## Median :1.000 Median :17.00 Median :1.000 Median :9238
## Mean :1.179 Mean :18.67 Mean :1.728 Mean :8857
## 3rd Qu.:1.000 3rd Qu.:39.00 3rd Qu.:2.000 3rd Qu.:9556
## Max. :6.000 Max. :57.00 Max. :9.000 Max. :9991
## Daytime/evening attendance\t Previous qualification
## Min. :0.0000 Min. : 1.000
## 1st Qu.:1.0000 1st Qu.: 1.000
## Median :1.0000 Median : 1.000
## Mean :0.8908 Mean : 4.578
## 3rd Qu.:1.0000 3rd Qu.: 1.000
## Max. :1.0000 Max. :43.000
## Previous qualification (grade) Nacionality Mother's qualification
## Min. : 95.0 Min. : 1.000 Min. : 1.00
## 1st Qu.:125.0 1st Qu.: 1.000 1st Qu.: 2.00
## Median :133.1 Median : 1.000 Median :19.00
## Mean :132.6 Mean : 1.873 Mean :19.56
## 3rd Qu.:140.0 3rd Qu.: 1.000 3rd Qu.:37.00
## Max. :190.0 Max. :109.000 Max. :44.00
## Father's qualification Mother's occupation Father's occupation Admission grade
## Min. : 1.00 Min. : 0.00 Min. : 0.00 Min. : 95.0
## 1st Qu.: 3.00 1st Qu.: 4.00 1st Qu.: 4.00 1st Qu.:117.9
## Median :19.00 Median : 5.00 Median : 7.00 Median :126.1
## Mean :22.28 Mean : 10.96 Mean : 11.03 Mean :127.0
## 3rd Qu.:37.00 3rd Qu.: 9.00 3rd Qu.: 9.00 3rd Qu.:134.8
## Max. :44.00 Max. :194.00 Max. :195.00 Max. :190.0
## Displaced Educational special needs Debtor
## Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :0.00000 Median :0.0000
## Mean :0.5484 Mean :0.01153 Mean :0.1137
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000
## Tuition fees up to date Gender Scholarship holder Age at enrollment
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :17.00
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:19.00
## Median :1.0000 Median :0.0000 Median :0.0000 Median :20.00
## Mean :0.8807 Mean :0.3517 Mean :0.2484 Mean :23.27
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:25.00
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :70.00
## International Curricular units 1st sem (credited)
## Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.00000 Median : 0.00
## Mean :0.02486 Mean : 0.71
## 3rd Qu.:0.00000 3rd Qu.: 0.00
## Max. :1.00000 Max. :20.00
## Curricular units 1st sem (enrolled) Curricular units 1st sem (evaluations)
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 5.000 1st Qu.: 6.000
## Median : 6.000 Median : 8.000
## Mean : 6.271 Mean : 8.299
## 3rd Qu.: 7.000 3rd Qu.:10.000
## Max. :26.000 Max. :45.000
## Curricular units 1st sem (approved) Curricular units 1st sem (grade)
## Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.000 1st Qu.:11.00
## Median : 5.000 Median :12.29
## Mean : 4.707 Mean :10.64
## 3rd Qu.: 6.000 3rd Qu.:13.40
## Max. :26.000 Max. :18.88
## Curricular units 1st sem (without evaluations)
## Min. : 0.0000
## 1st Qu.: 0.0000
## Median : 0.0000
## Mean : 0.1377
## 3rd Qu.: 0.0000
## Max. :12.0000
## Curricular units 2nd sem (credited) Curricular units 2nd sem (enrolled)
## Min. : 0.0000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 5.000
## Median : 0.0000 Median : 6.000
## Mean : 0.5418 Mean : 6.232
## 3rd Qu.: 0.0000 3rd Qu.: 7.000
## Max. :19.0000 Max. :23.000
## Curricular units 2nd sem (evaluations) Curricular units 2nd sem (approved)
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 6.000 1st Qu.: 2.000
## Median : 8.000 Median : 5.000
## Mean : 8.063 Mean : 4.436
## 3rd Qu.:10.000 3rd Qu.: 6.000
## Max. :33.000 Max. :20.000
## Curricular units 2nd sem (grade)
## Min. : 0.00
## 1st Qu.:10.75
## Median :12.20
## Mean :10.23
## 3rd Qu.:13.33
## Max. :18.57
## Curricular units 2nd sem (without evaluations) Unemployment rate
## Min. : 0.0000 Min. : 7.60
## 1st Qu.: 0.0000 1st Qu.: 9.40
## Median : 0.0000 Median :11.10
## Mean : 0.1503 Mean :11.57
## 3rd Qu.: 0.0000 3rd Qu.:13.90
## Max. :12.0000 Max. :16.20
## Inflation rate GDP Target
## Min. :-0.800 Min. :-4.060000 Length:4424
## 1st Qu.: 0.300 1st Qu.:-1.700000 Class :character
## Median : 1.400 Median : 0.320000 Mode :character
## Mean : 1.228 Mean : 0.001969
## 3rd Qu.: 2.600 3rd Qu.: 1.790000
## Max. : 3.700 Max. : 3.510000
Performing numeric summary of Data for different columns :
Let’s find out Min and Max age of the students enrolled
min(mpg$'Age at enrollment')
## [1] 17
max(mpg$'Age at enrollment')
## [1] 70
#As per the summary the minimum age of the students who enrolled in the course is 17 and the maximum age of the studetns who enrolled in the course is 70 years
#Lets calculate average admission grade of the students
mean(mpg$'Admission grade')
## [1] 126.9781
#As per the output the average admission grade is 126.9781
quantile(mpg$'Marital status')
## 0% 25% 50% 75% 100%
## 1 1 1 1 6
quantile(mpg$Course)
## 0% 25% 50% 75% 100%
## 33 9085 9238 9556 9991
#As per the quantile range 50% are enrolled in the course 9238
median(mpg$`Unemployment rate`)
## [1] 11.1
mean(mpg$`Inflation rate`)
## [1] 1.228029
table(mpg$Target)
##
## Dropout Enrolled Graduate
## 1421 794 2209
#As per the summary of Target column we have 1421 dropouts , 794 Enrolled , 2209 Graduate
mean(mpg$`Curricular units 1st sem (credited)`)
## [1] 0.709991
mean(mpg$`Curricular units 1st sem (approved)`)
## [1] 4.7066
mean(mpg$`Curricular units 1st sem (evaluations)`)
## [1] 8.299051
mean(mpg$`Curricular units 1st sem (grade)`)
## [1] 10.64082
Visualizing the Data by various charts .
ggplot(mpg, aes(Gender,`Age at enrollment`)) +
geom_point()
ggplot(mpg, aes(x = GDP)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black")
ggplot(mpg, aes(x = Course , y = Target)) +
geom_bar(stat = "identity")
ggplot(mpg, aes(x = Target , y = `Unemployment rate`)) +
geom_boxplot()
ggplot(mpg, aes(x=Course,y=`Curricular units 1st sem (grade)`)) +
geom_point(stat = "identity")
ggplot(mpg, aes(x=`Application mode`,y=`Application order`)) +
geom_point ()