Dataset Source: https://www.kaggle.com/mohansacharya/graduate-admissions/downloads/graduate-admissions.zip/2

library(ggplot2)
library(viridis)
## Loading required package: viridisLite
admission = read.csv("C:/Users/sulov/Desktop/Datasets/Admission_Predict.csv")
head(admission)
##   Serial.No. GRE.Score TOEFL.Score University.Rating SOP LOR CGPA Research
## 1         25       336         119                 5 4.0 3.5 9.80        1
## 2        144       340         120                 4 4.5 4.0 9.92        1
## 3        203       340         120                 5 4.5 4.5 9.91        1
## 4        204       334         120                 5 4.0 5.0 9.87        1
## 5         72       336         112                 5 5.0 5.0 9.76        1
## 6         82       340         120                 4 5.0 5.0 9.50        1
##   Chance.of.Admit
## 1            0.97
## 2            0.97
## 3            0.97
## 4            0.97
## 5            0.96
## 6            0.96

The table shows the GRE score, TOEFL Score, Rating of the University, Statement of Purpose (SOP) Rating, Letter of Recommendation (LOR) Rating, CGPA, Research Paper (Yes/No) and Chance of getting admission

gre = ggplot(admission)+ 
    aes(GRE.Score, Chance.of.Admit, colour = GRE.Score)+ 
    geom_point()+
    theme_classic()+ 
    scale_color_viridis(option = "D")
Toefl = ggplot(admission)+ 
    aes(TOEFL.Score, Chance.of.Admit, colour = TOEFL.Score)+ 
    geom_point()+
    theme_classic()
CGPA = ggplot(admission)+ 
    aes(CGPA, Chance.of.Admit, colour = CGPA)+ 
    geom_point()+
    theme_classic()+ 
    scale_color_viridis(option = "D")

We will now use gridExtra package to combine the plots.

library(gridExtra)
grid.arrange(gre, Toefl, CGPA, nrow = 3)

Now, let’s see University rating in chance of getting admission.

ggplot(admission)+ 
    aes(factor(University.Rating), Chance.of.Admit, fill = factor(University.Rating))+ 
    geom_boxplot()+
    theme_classic()+ 
    labs(x = "University Rating")

Similarly we visualize LOR and SOP

ggplot(admission)+ 
    aes(factor(LOR), Chance.of.Admit, fill = factor(LOR))+ 
    geom_boxplot()+
    theme_classic()+
  labs(x = "LOR Rating")

ggplot(admission)+ 
    aes(factor(SOP), Chance.of.Admit, fill = factor(SOP))+ 
    geom_boxplot()+
    theme_classic()+
  labs(x = "SOP Rating")

Finally, we will do a correlation plot using the package "corrplot. Here, we will remove the Serial number by creating a new data.

library(corrplot)
## corrplot 0.84 loaded
admission1 = admission
admission1$Serial.No. = NULL
co = cor(admission1)
corrplot(co, method = 'number')

With the help of these plots we can observe that applicants having high CGPA and GRE scores have high chance of getting admission and applicants having low CGPA and GRE scores have very low chance of getting accepted to a college.

Let’s visualize it.

ggplot(admission)+ 
    aes(GRE.Score, CGPA, colour = Chance.of.Admit)+ 
    geom_point()+ 
    theme_classic()+ 
    scale_color_viridis(option = "D")

Bibiliography

Mohan S Acharya, Asfia Armaan, Aneeta S Antony : A Comparison of Regression Models for Prediction of Graduate Admissions, IEEE International Conference on Computational Intelligence in Data Science 2019