mba salary

MBA SALARY and ADMISSION Case Study

mbasalary <- read.csv("MBA Starting Salaries Data.csv")
summary(mbasalary)

##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

Adding two columns for better representation

mbasalary$sex1 <- mbasalary$sex
mbasalary$frstlang1 <- mbasalary$frstlang

mbasalary$sex1[mbasalary$sex1==1] <- "Male"
mbasalary$sex1[mbasalary$sex1==2] <- "Female"

mbasalary$frstlang1[mbasalary$frstlang1==1] <- "English"
mbasalary$frstlang1[mbasalary$frstlang1==2] <- "Non English"

Boxplot showing gender distribution in the dataset

a <- table(mbasalary$sex1)
barplot(a, main = "Gender distribution in the dataset")

Boxplots showing various variables and their distribution

boxplot(mbasalary$gmat_tot, horizontal = TRUE, xlab= "GMAT Total Scores", main = "GMAT Total Distribution",las= 1)

boxplot(mbasalary$gmat_qpc, horizontal = TRUE, xlab= "Quantitative GMAT Scores", main = "Quantitative GMAT Score Distribution",las= 1)

boxplot(mbasalary$gmat_vpc, horizontal = TRUE, xlab= "Verbal GMAT Scores", main = "Verbal GMAT Score Distribution",las= 1)

boxplot(mbasalary$gmat_tpc, horizontal = TRUE, xlab= "Overall GMAT Percentile", main = "Overall GMAT Percentile Distribution",las= 1)

boxplot(mbasalary$s_avg, horizontal = TRUE, xlab= "Spring MBA Avg", main = "Spring MBA Avg Distribution",las= 1)

boxplot(mbasalary$f_avg, horizontal = TRUE, xlab= "Fall MBA Avg", main = "Fall MBA Avg Distribution",las= 1)

boxplot(mbasalary$work_yrs, horizontal = TRUE, xlab= "Work Ex", main = "Work Ex Distribution",las= 1)

Boxplot showing distribution of quarters in the dataset

b <- table(mbasalary$quarter)
barplot(b)

Creating a dataframe for students who got a job and declared their salary

yesjob <- mbasalary[which(mbasalary$salary >= 1000),]
boxplot(yesjob$salary, horizontal = TRUE, xlab= "Salary", main = "Salary Distribution",las= 1)

Boxplot depicting satisfaction of students who completed the survey

nosatis <- mbasalary[which(mbasalary$satis < 10),]
boxplot(nosatis$satis, horizontal = TRUE, xlab= "Satisfaction on a Scale of 1-7", main = "Satisfaction Distribution",las= 1)

Scatterplots depicting various relationship between variables for the people who got a job offer

library(car)

## Warning: package 'car' was built under R version 3.4.3

scatterplot(yesjob$sex,yesjob$salary)

scatterplot(yesjob$age,yesjob$salary)

scatterplot(yesjob$gmat_tot,yesjob$salary)

scatterplot(yesjob$work_yrs,yesjob$salary)

scatterplot(yesjob$s_avg,yesjob$salary)

scatterplot(yesjob$f_avg,yesjob$salary)

scatterplot(yesjob$satis,yesjob$salary)

scatterplot(yesjob$salary, yesjob$frstlang)

Scatterplots depicting various relationship between variables for people who completed the satisfaction survey

scatterplot(nosatis$sex,nosatis$satis)

scatterplot(nosatis$satis, nosatis$age)

scatterplot(nosatis$satis, nosatis$gmat_tot)

scatterplot(nosatis$satis, nosatis$work_yrs)

scatterplot(nosatis$satis, nosatis$s_avg)

scatterplot(nosatis$satis, nosatis$f_avg)

Barplot showing distribution of satisfaction based on student’s first langauge

abc <- table(nosatis$frstlang1,nosatis$satis)
barplot(abc,ylab="Frequency",xlab = "Satisfaction on a scale of 1-7",col=c("darkblue","red"),
        legend = rownames(abc), beside=TRUE)

Scatterplot depicting the relation of Student’s score in GMAT and MBA

scatterplot(mbasalary$gmat_tot, mbasalary$s_avg)

scatterplot(mbasalary$gmat_tot, mbasalary$f_avg)

CORRGRAM of all the variables in the original data set

library(corrgram)

## Warning: package 'corrgram' was built under R version 3.4.3

corrgram(mbasalary[c(1:13)], order=TRUE,
         main="MBA SALARY CORRGRAM",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

Correlation between all the variables in the original data set

cor(mbasalary[c(1:13)])

##                  age          sex    gmat_tot    gmat_qpc    gmat_vpc
## age       1.00000000 -0.028106442 -0.14593840 -0.21616985 -0.04417547
## sex      -0.02810644  1.000000000 -0.05336820 -0.16377435  0.07488782
## gmat_tot -0.14593840 -0.053368202  1.00000000  0.72473781  0.74839187
## gmat_qpc -0.21616985 -0.163774346  0.72473781  1.00000000  0.15218014
## gmat_vpc -0.04417547  0.074887816  0.74839187  0.15218014  1.00000000
## gmat_tpc -0.16990307 -0.008090213  0.84779965  0.65137754  0.66621604
## s_avg     0.14970402  0.127115144  0.11311702 -0.02984873  0.20445365
## f_avg    -0.01744806  0.091663891  0.10442409  0.07370455  0.07592225
## quarter  -0.04967221 -0.133533171 -0.09223903  0.03636638 -0.17460736
## work_yrs  0.85829810 -0.011296374 -0.18235434 -0.23660827 -0.06639049
## frstlang  0.05692649  0.001536205 -0.13503402  0.13892774 -0.38980465
## salary   -0.06257355  0.068858628 -0.05497188 -0.04403293 -0.00613934
## satis    -0.12788825 -0.054602220  0.08255770  0.06060004  0.06262375
##              gmat_tpc       s_avg       f_avg       quarter     work_yrs
## age      -0.169903066  0.14970402 -0.01744806 -4.967221e-02  0.858298096
## sex      -0.008090213  0.12711514  0.09166389 -1.335332e-01 -0.011296374
## gmat_tot  0.847799647  0.11311702  0.10442409 -9.223903e-02 -0.182354339
## gmat_qpc  0.651377538 -0.02984873  0.07370455  3.636638e-02 -0.236608270
## gmat_vpc  0.666216035  0.20445365  0.07592225 -1.746074e-01 -0.066390490
## gmat_tpc  1.000000000  0.11736245  0.07973210 -8.303535e-02 -0.173361859
## s_avg     0.117362449  1.00000000  0.55062139 -7.621166e-01  0.129292714
## f_avg     0.079732099  0.55062139  1.00000000 -4.475064e-01 -0.039056921
## quarter  -0.083035351 -0.76211664 -0.44750637  1.000000e+00 -0.086026406
## work_yrs -0.173361859  0.12929271 -0.03905692 -8.602641e-02  1.000000000
## frstlang -0.103362747 -0.13631308 -0.03705695  9.949226e-02 -0.027866747
## salary    0.004930901  0.14583606  0.02944303 -1.643699e-01  0.009023407
## satis     0.092934266 -0.03268664  0.01089273 -1.267198e-05 -0.109255286
##              frstlang       salary         satis
## age       0.056926486 -0.062573547 -1.278882e-01
## sex       0.001536205  0.068858628 -5.460222e-02
## gmat_tot -0.135034017 -0.054971880  8.255770e-02
## gmat_qpc  0.138927742 -0.044032933  6.060004e-02
## gmat_vpc -0.389804653 -0.006139340  6.262375e-02
## gmat_tpc -0.103362747  0.004930901  9.293427e-02
## s_avg    -0.136313080  0.145836062 -3.268664e-02
## f_avg    -0.037056954  0.029443027  1.089273e-02
## quarter   0.099492259 -0.164369865 -1.267198e-05
## work_yrs -0.027866747  0.009023407 -1.092553e-01
## frstlang  1.000000000 -0.086592096  7.932264e-02
## salary   -0.086592096  1.000000000 -3.352171e-01
## satis     0.079322637 -0.335217114  1.000000e+00

Creating a new data set of students that were not placed and then comparing thefirst language and gender for all 3 groups: All students, Not placed students and Placed Students in order

nojob <- mbasalary
nojob <- mbasalary[which(mbasalary$salary == 0),]

print("All Students")

## [1] "All Students"

table(mbasalary$frstlang1,mbasalary$sex1)

##              
##               Female Male
##   English         60  182
##   Non English      8   24

print("Not placed Students")

## [1] "Not placed Students"

table(nojob$frstlang1,nojob$sex1)

##              
##               Female Male
##   English         22   60
##   Non English      1    7

print("Placed Students")

## [1] "Placed Students"

table(yesjob$frstlang1,yesjob$sex1)

##              
##               Female Male
##   English         28   68
##   Non English      3    4

nojob <- mbasalary
nojob <- mbasalary[which(mbasalary$salary == 0),]

print("All Students")

## [1] "All Students"

table(mbasalary$frstlang1,mbasalary$sex1)

##              
##               Female Male
##   English         60  182
##   Non English      8   24

print("Not placed Students")

## [1] "Not placed Students"

table(nojob$frstlang1,nojob$sex1)

##              
##               Female Male
##   English         22   60
##   Non English      1    7

print("Placed Students")

## [1] "Placed Students"

table(yesjob$frstlang1,yesjob$sex1)

##              
##               Female Male
##   English         28   68
##   Non English      3    4

Adding one column to the mbasalary dataframe for better visualiztion

mbasalary$salary1 <- mbasalary$salary

mbasalary$salary1[mbasalary$salary1>1000] <- "Got an offer"
mbasalary$salary1[mbasalary$salary1==0] <- "Job less"
mbasalary$salary1[mbasalary$salary1==999] <- "Not Applicable"
mbasalary$salary1[mbasalary$salary1==998] <- "Not Applicable"

Creating tables to visualze various factors that may impact job placement

table(mbasalary$salary1,mbasalary$sex1)

##                 
##                  Female Male
##   Got an offer       31   72
##   Job less           23   67
##   Not Applicable     14   67

table(mbasalary$salary1,mbasalary$frstlang1)

##                 
##                  English Non English
##   Got an offer        96           7
##   Job less            82           8
##   Not Applicable      64          17

print("now comparing job placement vs satisfaction level")

## [1] "now comparing job placement vs satisfaction level"

table(mbasalary$salary1,mbasalary$satis)

##                 
##                   1  2  3  4  5  6  7 998
##   Got an offer    0  0  1  1 29 50 22   0
##   Job less        0  0  0  4 36 40 10   0
##   Not Applicable  1  1  4 12  9  7  1  46

Chi-sq test to check for association between Job placement and gender

chisq.test(mbasalary$salary1,mbasalary$sex1)

## 
##  Pearson's Chi-squared test
## 
## data:  mbasalary$salary1 and mbasalary$sex1
## X-squared = 4.0288, df = 2, p-value = 0.1334

Chi-sq test to check for association between Job placement and first language

chisq.test(mbasalary$salary1,mbasalary$frstlang1)

## 
##  Pearson's Chi-squared test
## 
## data:  mbasalary$salary1 and mbasalary$frstlang1
## X-squared = 9.8645, df = 2, p-value = 0.00721

Various t-test to check for association between Salary offered and various related factors

t.test(yesjob$salary,yesjob$age)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary and yesjob$age
## t = 58.503, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   99511.69 106496.23
## sample estimates:
##   mean of x   mean of y 
## 103030.7379     26.7767

t.test(yesjob$salary~yesjob$sex)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary by yesjob$sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2 
##       104970.97        98524.39

t.test(yesjob$salary~yesjob$frstlang)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary by yesjob$frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -59933.62  22202.25
## sample estimates:
## mean in group 1 mean in group 2 
##        101748.6        120614.3

t.test(yesjob$salary,yesjob$gmat_tot)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary and yesjob$gmat_tot
## t = 58.168, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   98922.43 105907.00
## sample estimates:
##   mean of x   mean of y 
## 103030.7379    616.0194

t.test(yesjob$salary,yesjob$s_avg)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary and yesjob$s_avg
## t = 58.516, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   99535.37 106519.92
## sample estimates:
##    mean of x    mean of y 
## 103030.73786      3.09233

t.test(yesjob$salary,yesjob$work_yrs)

## 
##  Welch Two Sample t-test
## 
## data:  yesjob$salary and yesjob$work_yrs
## t = 58.516, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   99534.79 106519.33
## sample estimates:
##    mean of x    mean of y 
## 1.030307e+05 3.679612e+00

Using linear regression to build a model for predicting Salary using multiple factors

predi <- lm(yesjob$salary~yesjob$sex+yesjob$work_yrs+yesjob$gmat_tot+yesjob$s_avg+yesjob$f_avg+yesjob$frstlang)
summary(predi)

## 
## Call:
## lm(formula = yesjob$salary ~ yesjob$sex + yesjob$work_yrs + yesjob$gmat_tot + 
##     yesjob$s_avg + yesjob$f_avg + yesjob$frstlang)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32652  -8940  -1709   5186  83182 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     82356.30   24206.88   3.402 0.000976 ***
## yesjob$sex      -5886.39    3462.79  -1.700 0.092388 .  
## yesjob$work_yrs  2201.83     579.92   3.797 0.000257 ***
## yesjob$gmat_tot   -11.90      31.77  -0.375 0.708712    
## yesjob$s_avg     4851.02    4986.79   0.973 0.333110    
## yesjob$f_avg    -1153.74    3822.28  -0.302 0.763422    
## yesjob$frstlang 15101.77    6473.46   2.333 0.021743 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15760 on 96 degrees of freedom
## Multiple R-squared:  0.2678, Adjusted R-squared:  0.2221 
## F-statistic: 5.853 on 6 and 96 DF,  p-value: 3.114e-05

Using linear regression to build a model for predicting Salary using Work-ex and First language

predi1 <- lm(yesjob$salary~yesjob$work_yrs+yesjob$frstlang)
summary(predi1)

## 
## Call:
## lm(formula = yesjob$salary ~ yesjob$work_yrs + yesjob$frstlang)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33972  -8955   -455   4545  76681 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      79941.4     6788.8  11.775  < 2e-16 ***
## yesjob$work_yrs   2483.3      527.9   4.704 8.18e-06 ***
## yesjob$frstlang  13064.0     6283.2   2.079   0.0402 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15740 on 100 degrees of freedom
## Multiple R-squared:  0.2396, Adjusted R-squared:  0.2244 
## F-statistic: 15.75 on 2 and 100 DF,  p-value: 1.128e-06

Using linear regression to build a model for predicting Salary using Gender, Gmat Total, and MBA performance

predi2  <- lm(yesjob$salary~yesjob$sex+yesjob$gmat_tot+yesjob$s_avg+yesjob$f_avg)
summary(predi2)

## 
## Call:
## lm(formula = yesjob$salary ~ yesjob$sex + yesjob$gmat_tot + yesjob$s_avg + 
##     yesjob$f_avg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -39889  -7926  -2357   4047 120521 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     122912.34   24422.85   5.033  2.2e-06 ***
## yesjob$sex       -6158.11    3825.41  -1.610   0.1107    
## yesjob$gmat_tot    -38.61      34.87  -1.107   0.2708    
## yesjob$s_avg      9603.44    5173.84   1.856   0.0664 .  
## yesjob$f_avg     -5752.29    4028.95  -1.428   0.1565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17550 on 98 degrees of freedom
## Multiple R-squared:  0.07369,    Adjusted R-squared:  0.03588 
## F-statistic: 1.949 on 4 and 98 DF,  p-value: 0.1084

From the above analysis we see that the best predictor for salary are Gender and First language.

Using tables to visualize Job Placement and Gender or First language

table(mbasalary$salary1,mbasalary$sex1)

##                 
##                  Female Male
##   Got an offer       31   72
##   Job less           23   67
##   Not Applicable     14   67

table(mbasalary$salary1,mbasalary$frstlang1)

##                 
##                  English Non English
##   Got an offer        96           7
##   Job less            82           8
##   Not Applicable      64          17

Performing Chi-sq test to check association between various variables

chisq.test(mbasalary$salary1,mbasalary$sex1)

## 
##  Pearson's Chi-squared test
## 
## data:  mbasalary$salary1 and mbasalary$sex1
## X-squared = 4.0288, df = 2, p-value = 0.1334

chisq.test(mbasalary$salary1,mbasalary$frstlang1)

## 
##  Pearson's Chi-squared test
## 
## data:  mbasalary$salary1 and mbasalary$frstlang1
## X-squared = 9.8645, df = 2, p-value = 0.00721

chisq.test(mbasalary$frstlang1,mbasalary$sex1)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mbasalary$frstlang1 and mbasalary$sex1
## X-squared = 2.4871e-29, df = 1, p-value = 1

Creating a new dataframe to perform logistic regression

mbasalary12 <- mbasalary
mbasalary12$salary2 <- mbasalary12$salary
mbasalary12<-mbasalary12[mbasalary12$salary2!=998,]
mbasalary12$salary2[mbasalary12$salary2>1] <- 1
mbasalary12$salary2[mbasalary12$salary2==0] <- 0

Removing actual salary data from the dataframe and diving it into two parts for training and testing.

We are using 90:10 split for the dataframe

mbasalary12 <- mbasalary12[-c(12)]

train <- mbasalary12[1:205,]
test <- mbasalary12[206:228,]

Creating the logistics regression model

model <- glm(salary2 ~ age + sex+ gmat_tot+gmat_qpc+gmat_vpc+ gmat_tpc + s_avg +f_avg + quarter + work_yrs+frstlang + satis ,family=binomial(link='logit'),data=mbasalary12)
summary(model)

## 
## Call:
## glm(formula = salary2 ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang + 
##     satis, family = binomial(link = "logit"), data = mbasalary12)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9069  -1.2617   0.8233   0.9796   1.5543  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  7.229484   3.907020   1.850   0.0643 .
## age         -0.182398   0.075593  -2.413   0.0158 *
## sex         -0.071435   0.332632  -0.215   0.8300  
## gmat_tot    -0.006845   0.010361  -0.661   0.5088  
## gmat_qpc     0.012925   0.028238   0.458   0.6472  
## gmat_vpc     0.016111   0.027631   0.583   0.5598  
## gmat_tpc     0.011001   0.019087   0.576   0.5644  
## s_avg        0.032437   0.621421   0.052   0.9584  
## f_avg       -0.190403   0.340350  -0.559   0.5759  
## quarter     -0.141159   0.192870  -0.732   0.4642  
## work_yrs     0.097279   0.083493   1.165   0.2440  
## frstlang     0.608889   0.578531   1.052   0.2926  
## satis       -0.158727   0.157510  -1.008   0.3136  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 305.89  on 227  degrees of freedom
## Residual deviance: 291.96  on 215  degrees of freedom
## AIC: 317.96
## 
## Number of Fisher Scoring iterations: 4

Now predicting job placement using the model and checking it’s efficiency.

fitted.results <- predict(model,newdata=subset(test,select=c(1:12)),type='response')
fitted.results <- ifelse(fitted.results > 0.5,1,0)

misClasificError <- mean(fitted.results != test$salary2)
print(paste('Accuracy',1-misClasificError))

## [1] "Accuracy 0.869565217391304"

mba salary

Anshul Nograiya

21 February 2018

MBA SALARY and ADMISSION Case Study

Adding two columns for better representation

Boxplot showing gender distribution in the dataset

Boxplots showing various variables and their distribution

Boxplot showing distribution of quarters in the dataset

Creating a dataframe for students who got a job and declared their salary

Boxplot depicting satisfaction of students who completed the survey

Scatterplots depicting various relationship between variables for the people who got a job offer

Scatterplots depicting various relationship between variables for people who completed the satisfaction survey

Barplot showing distribution of satisfaction based on student’s first langauge

Scatterplot depicting the relation of Student’s score in GMAT and MBA

CORRGRAM of all the variables in the original data set

Correlation between all the variables in the original data set

Creating a new data set of students that were not placed and then comparing thefirst language and gender for all 3 groups: All students, Not placed students and Placed Students in order

Adding one column to the mbasalary dataframe for better visualiztion

Creating tables to visualze various factors that may impact job placement

Chi-sq test to check for association between Job placement and gender

Chi-sq test to check for association between Job placement and first language

Using linear regression to build a model for predicting Salary using multiple factors

Using linear regression to build a model for predicting Salary using Work-ex and First language

Using linear regression to build a model for predicting Salary using Gender, Gmat Total, and MBA performance

Using tables to visualize Job Placement and Gender or First language

Performing Chi-sq test to check association between various variables

Creating a new dataframe to perform logistic regression

Removing actual salary data from the dataframe and diving it into two parts for training and testing.

Creating the logistics regression model

Now predicting job placement using the model and checking it’s efficiency.