library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

# Loading DATA SET

mpg<- read_delim("C:/Users/kondo/OneDrive/Desktop/INTRO to Statistics and R/Data Set and work/data.csv", delim = ";",show_col_types = FALSE)
glimpse(mpg)
## Rows: 4,424
## Columns: 37
## $ `Marital status`                                 <dbl> 1, 1, 1, 1, 2, 2, 1, …
## $ `Application mode`                               <dbl> 17, 15, 1, 17, 39, 39…
## $ `Application order`                              <dbl> 5, 1, 5, 2, 1, 1, 1, …
## $ Course                                           <dbl> 171, 9254, 9070, 9773…
## $ `Daytime/evening attendance\t`                   <dbl> 1, 1, 1, 1, 0, 0, 1, …
## $ `Previous qualification`                         <dbl> 1, 1, 1, 1, 1, 19, 1,…
## $ `Previous qualification (grade)`                 <dbl> 122.0, 160.0, 122.0, …
## $ Nacionality                                      <dbl> 1, 1, 1, 1, 1, 1, 1, …
## $ `Mother's qualification`                         <dbl> 19, 1, 37, 38, 37, 37…
## $ `Father's qualification`                         <dbl> 12, 3, 37, 37, 38, 37…
## $ `Mother's occupation`                            <dbl> 5, 3, 9, 5, 9, 9, 7, …
## $ `Father's occupation`                            <dbl> 9, 3, 9, 3, 9, 7, 10,…
## $ `Admission grade`                                <dbl> 127.3, 142.5, 124.8, …
## $ Displaced                                        <dbl> 1, 1, 1, 1, 0, 0, 1, …
## $ `Educational special needs`                      <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ Debtor                                           <dbl> 0, 0, 0, 0, 0, 1, 0, …
## $ `Tuition fees up to date`                        <dbl> 1, 0, 0, 1, 1, 1, 1, …
## $ Gender                                           <dbl> 1, 1, 1, 0, 0, 1, 0, …
## $ `Scholarship holder`                             <dbl> 0, 0, 0, 0, 0, 0, 1, …
## $ `Age at enrollment`                              <dbl> 20, 19, 19, 20, 45, 5…
## $ International                                    <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 1st sem (credited)`            <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 1st sem (enrolled)`            <dbl> 0, 6, 6, 6, 6, 5, 7, …
## $ `Curricular units 1st sem (evaluations)`         <dbl> 0, 6, 0, 8, 9, 10, 9,…
## $ `Curricular units 1st sem (approved)`            <dbl> 0, 6, 0, 6, 5, 5, 7, …
## $ `Curricular units 1st sem (grade)`               <dbl> 0.00000, 14.00000, 0.…
## $ `Curricular units 1st sem (without evaluations)` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 2nd sem (credited)`            <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `Curricular units 2nd sem (enrolled)`            <dbl> 0, 6, 6, 6, 6, 5, 8, …
## $ `Curricular units 2nd sem (evaluations)`         <dbl> 0, 6, 0, 10, 6, 17, 8…
## $ `Curricular units 2nd sem (approved)`            <dbl> 0, 6, 0, 5, 6, 5, 8, …
## $ `Curricular units 2nd sem (grade)`               <dbl> 0.00000, 13.66667, 0.…
## $ `Curricular units 2nd sem (without evaluations)` <dbl> 0, 0, 0, 0, 0, 5, 0, …
## $ `Unemployment rate`                              <dbl> 10.8, 13.9, 10.8, 9.4…
## $ `Inflation rate`                                 <dbl> 1.4, -0.3, 1.4, -0.8,…
## $ GDP                                              <dbl> 1.74, 0.79, 1.74, -3.…
## $ Target                                           <chr> "Dropout", "Graduate"…

This Data is related to students enrolled in different undergraduate degrees, such as agronomy, design, education, nursing, journalism, management, social service, and technologies.The data is used to Analyse the number of students’ dropout and their academic success

summary(mpg)
##  Marital status  Application mode Application order     Course    
##  Min.   :1.000   Min.   : 1.00    Min.   :0.000     Min.   :  33  
##  1st Qu.:1.000   1st Qu.: 1.00    1st Qu.:1.000     1st Qu.:9085  
##  Median :1.000   Median :17.00    Median :1.000     Median :9238  
##  Mean   :1.179   Mean   :18.67    Mean   :1.728     Mean   :8857  
##  3rd Qu.:1.000   3rd Qu.:39.00    3rd Qu.:2.000     3rd Qu.:9556  
##  Max.   :6.000   Max.   :57.00    Max.   :9.000     Max.   :9991  
##  Daytime/evening attendance\t Previous qualification
##  Min.   :0.0000               Min.   : 1.000        
##  1st Qu.:1.0000               1st Qu.: 1.000        
##  Median :1.0000               Median : 1.000        
##  Mean   :0.8908               Mean   : 4.578        
##  3rd Qu.:1.0000               3rd Qu.: 1.000        
##  Max.   :1.0000               Max.   :43.000        
##  Previous qualification (grade)  Nacionality      Mother's qualification
##  Min.   : 95.0                  Min.   :  1.000   Min.   : 1.00         
##  1st Qu.:125.0                  1st Qu.:  1.000   1st Qu.: 2.00         
##  Median :133.1                  Median :  1.000   Median :19.00         
##  Mean   :132.6                  Mean   :  1.873   Mean   :19.56         
##  3rd Qu.:140.0                  3rd Qu.:  1.000   3rd Qu.:37.00         
##  Max.   :190.0                  Max.   :109.000   Max.   :44.00         
##  Father's qualification Mother's occupation Father's occupation Admission grade
##  Min.   : 1.00          Min.   :  0.00      Min.   :  0.00      Min.   : 95.0  
##  1st Qu.: 3.00          1st Qu.:  4.00      1st Qu.:  4.00      1st Qu.:117.9  
##  Median :19.00          Median :  5.00      Median :  7.00      Median :126.1  
##  Mean   :22.28          Mean   : 10.96      Mean   : 11.03      Mean   :127.0  
##  3rd Qu.:37.00          3rd Qu.:  9.00      3rd Qu.:  9.00      3rd Qu.:134.8  
##  Max.   :44.00          Max.   :194.00      Max.   :195.00      Max.   :190.0  
##    Displaced      Educational special needs     Debtor      
##  Min.   :0.0000   Min.   :0.00000           Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000           1st Qu.:0.0000  
##  Median :1.0000   Median :0.00000           Median :0.0000  
##  Mean   :0.5484   Mean   :0.01153           Mean   :0.1137  
##  3rd Qu.:1.0000   3rd Qu.:0.00000           3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.00000           Max.   :1.0000  
##  Tuition fees up to date     Gender       Scholarship holder Age at enrollment
##  Min.   :0.0000          Min.   :0.0000   Min.   :0.0000     Min.   :17.00    
##  1st Qu.:1.0000          1st Qu.:0.0000   1st Qu.:0.0000     1st Qu.:19.00    
##  Median :1.0000          Median :0.0000   Median :0.0000     Median :20.00    
##  Mean   :0.8807          Mean   :0.3517   Mean   :0.2484     Mean   :23.27    
##  3rd Qu.:1.0000          3rd Qu.:1.0000   3rd Qu.:0.0000     3rd Qu.:25.00    
##  Max.   :1.0000          Max.   :1.0000   Max.   :1.0000     Max.   :70.00    
##  International     Curricular units 1st sem (credited)
##  Min.   :0.00000   Min.   : 0.00                      
##  1st Qu.:0.00000   1st Qu.: 0.00                      
##  Median :0.00000   Median : 0.00                      
##  Mean   :0.02486   Mean   : 0.71                      
##  3rd Qu.:0.00000   3rd Qu.: 0.00                      
##  Max.   :1.00000   Max.   :20.00                      
##  Curricular units 1st sem (enrolled) Curricular units 1st sem (evaluations)
##  Min.   : 0.000                      Min.   : 0.000                        
##  1st Qu.: 5.000                      1st Qu.: 6.000                        
##  Median : 6.000                      Median : 8.000                        
##  Mean   : 6.271                      Mean   : 8.299                        
##  3rd Qu.: 7.000                      3rd Qu.:10.000                        
##  Max.   :26.000                      Max.   :45.000                        
##  Curricular units 1st sem (approved) Curricular units 1st sem (grade)
##  Min.   : 0.000                      Min.   : 0.00                   
##  1st Qu.: 3.000                      1st Qu.:11.00                   
##  Median : 5.000                      Median :12.29                   
##  Mean   : 4.707                      Mean   :10.64                   
##  3rd Qu.: 6.000                      3rd Qu.:13.40                   
##  Max.   :26.000                      Max.   :18.88                   
##  Curricular units 1st sem (without evaluations)
##  Min.   : 0.0000                               
##  1st Qu.: 0.0000                               
##  Median : 0.0000                               
##  Mean   : 0.1377                               
##  3rd Qu.: 0.0000                               
##  Max.   :12.0000                               
##  Curricular units 2nd sem (credited) Curricular units 2nd sem (enrolled)
##  Min.   : 0.0000                     Min.   : 0.000                     
##  1st Qu.: 0.0000                     1st Qu.: 5.000                     
##  Median : 0.0000                     Median : 6.000                     
##  Mean   : 0.5418                     Mean   : 6.232                     
##  3rd Qu.: 0.0000                     3rd Qu.: 7.000                     
##  Max.   :19.0000                     Max.   :23.000                     
##  Curricular units 2nd sem (evaluations) Curricular units 2nd sem (approved)
##  Min.   : 0.000                         Min.   : 0.000                     
##  1st Qu.: 6.000                         1st Qu.: 2.000                     
##  Median : 8.000                         Median : 5.000                     
##  Mean   : 8.063                         Mean   : 4.436                     
##  3rd Qu.:10.000                         3rd Qu.: 6.000                     
##  Max.   :33.000                         Max.   :20.000                     
##  Curricular units 2nd sem (grade)
##  Min.   : 0.00                   
##  1st Qu.:10.75                   
##  Median :12.20                   
##  Mean   :10.23                   
##  3rd Qu.:13.33                   
##  Max.   :18.57                   
##  Curricular units 2nd sem (without evaluations) Unemployment rate
##  Min.   : 0.0000                                Min.   : 7.60    
##  1st Qu.: 0.0000                                1st Qu.: 9.40    
##  Median : 0.0000                                Median :11.10    
##  Mean   : 0.1503                                Mean   :11.57    
##  3rd Qu.: 0.0000                                3rd Qu.:13.90    
##  Max.   :12.0000                                Max.   :16.20    
##  Inflation rate        GDP               Target         
##  Min.   :-0.800   Min.   :-4.060000   Length:4424       
##  1st Qu.: 0.300   1st Qu.:-1.700000   Class :character  
##  Median : 1.400   Median : 0.320000   Mode  :character  
##  Mean   : 1.228   Mean   : 0.001969                     
##  3rd Qu.: 2.600   3rd Qu.: 1.790000                     
##  Max.   : 3.700   Max.   : 3.510000

Performing numeric summary of Data for different columns :

Let’s find out Min and Max age of the students enrolled

min(mpg$'Age at enrollment')
## [1] 17
max(mpg$'Age at enrollment') 
## [1] 70
#As per the summary the minimum age of the students who enrolled in the course is 17 and the maximum age of the studetns who enrolled in the course is 70 years
#Lets calculate average admission grade of the students
mean(mpg$'Admission grade')
## [1] 126.9781
#As per the output the average admission grade is 126.9781
quantile(mpg$'Marital status')
##   0%  25%  50%  75% 100% 
##    1    1    1    1    6
quantile(mpg$Course)
##   0%  25%  50%  75% 100% 
##   33 9085 9238 9556 9991
#As per the quantile range 50% are enrolled in the course 9238 
median(mpg$`Unemployment rate`)
## [1] 11.1
mean(mpg$`Inflation rate`)
## [1] 1.228029
table(mpg$Target)
## 
##  Dropout Enrolled Graduate 
##     1421      794     2209
#As per the summary of Target column we have 1421 dropouts , 794 Enrolled , 2209 Graduate
mean(mpg$`Curricular units 1st sem (credited)`)
## [1] 0.709991
mean(mpg$`Curricular units 1st sem (approved)`)
## [1] 4.7066
mean(mpg$`Curricular units 1st sem (evaluations)`)
## [1] 8.299051
mean(mpg$`Curricular units 1st sem (grade)`)
## [1] 10.64082

Visualizing the Data by various charts .

ggplot(mpg, aes(Gender,`Age at enrollment`)) +
  geom_point() 

ggplot(mpg, aes(x = GDP)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black")

ggplot(mpg, aes(x = Course , y = Target)) +
  geom_bar(stat = "identity")

ggplot(mpg, aes(x = Target , y = `Unemployment rate`)) +
  geom_boxplot()

ggplot(mpg, aes(x=Course,y=`Curricular units 1st sem (grade)`)) +
  geom_point(stat = "identity")  

ggplot(mpg, aes(x=`Application mode`,y=`Application order`)) +
  geom_point ()