MBA Starting Salaries Analysis

Reading the data and generating the summary

MBA_start <- read.csv(paste("MBA Starting Salaries Data.csv", sep=""))
summary(MBA_start)

##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

library(psych)
describe(MBA_start)

##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

Dividing different data frames

# Dataframe for students who were placed
Placed <- MBA_start[which (MBA_start$salary > 999), ]
View(Placed)

# Dataframe for students who were not placed
Not_Placed <- MBA_start[which (MBA_start$salary == 0), ]
View(Not_Placed)

# Dataframe for students who didnot disclose their starting salary
Salary_not_disclosed <- MBA_start[which (MBA_start$salary == 999), ]
View(Salary_not_disclosed)

# Dataframe for students who didnot answer the survey
Not_answered <- MBA_start[which (MBA_start$salary == 998), ]
View(Not_answered)

# Dataframe for students whose starting salary is known 
Salary_known <- MBA_start[which((MBA_start$salary == 0) | (MBA_start$salary > 999)), ]
View(Salary_known)

# Dataframe for students whose satisfaction is known
Satis_known <- MBA_start[which(MBA_start$satis < 8), ]
View(Satis_known)

Creating Box Plot for Age

boxplot(MBA_start$age,
        horizontal = TRUE,
        main = "Box Plot for Age",
        xlab =  "Age",
        col  =  "blue"
       )

Creating Box Plot for GMAT Total

boxplot(MBA_start$gmat_tot,
        horizontal = TRUE,
        main = "Box Plot for GMAT Total",
        xlab =  "GMAT Scores",
        col  =  "green"
       )

Creating Box Plot for GMAT percentile for Quants Section

boxplot(MBA_start$gmat_qpc,
        horizontal = TRUE,
        main = "Box Plot for GMAT Percentile for Quants Section",
        xlab =  "Percentile",
        col  =  "Yellow"
       )

Creating Box Plot for GMAT percentile for Verbal Section

boxplot(MBA_start$gmat_vpc,
        horizontal = TRUE,
        main = "Box Plot for GMAT Percentile for Verbal Section ",
        xlab =  "Percentile",
        col  =  "darkgreen"
       )

Creating Box Plot for GMAT percentile for overall GMAT Exam

boxplot(MBA_start$gmat_tpc,
        horizontal = TRUE,
        main = "Box Plot for GMAT Percentile (OVERALL) ",
        xlab =  "Percentile",
        col  =  "black"
       )

Creating Box Plot for Spring MBA Average

boxplot(MBA_start$s_avg,
        horizontal = TRUE,
        main = "Box Plot for Spring MBA Average ",
        xlab =  "Average",
        col  =  "purple"
       )

Creating Box Plot for Fall MBA Average

boxplot(MBA_start$f_avg,
        horizontal = TRUE,
        main = "Box Plot for Fall MBA Average ",
        xlab =  "Average",
        col  =  "tan"
       )

Creating Box Plot for Work Experience years

boxplot(MBA_start$work_yrs,
        horizontal = TRUE,
        main = "Box Plot for Years of Work Experience ",
        xlab =  "Years",
        col  =  "magenta"
       )

Creating Box Plot for Starting Salary

boxplot(Salary_known$salary,
        horizontal = TRUE,
        main = "Box Plot for Starting Salary ",
        xlab =  "Salary ",
        col  =  "lightgreen"
       )

Creating Bar Plot for Gender Distribution

cnt_gender <-table(MBA_start$sex)
barplot(cnt_gender, width=1, space=1, main="Gender Distribution",xlab="Gender",col=c("blue","red"),
        names.arg=c("Male","Female"),ylim=c(0,105),xlim=c(0,10))

Creating Bar Plot for First Language

first_lang <-table(MBA_start$frstlang)
barplot(first_lang, width=1, space=1, main="First Language of Students ",xlab="Language", 
        col=c("yellow","green"),names.arg=c("English","Other"),ylim=c(0,105),xlim=c(0,10))

Creating Bar Plot for Satisfaction degree of MBA students

hist(Satis_known$satis, breaks=5,col="brown",xlab="Satisfaction level,1=low 7=high", main="Satisfaction  distribution")

Creating Bar Plot Between Placed and Non-Placed Students

Salary_known$placed <- ifelse(Salary_known$salary == 0, 0, 1)
cnt_placed <-table(Salary_known$placed)
barplot(cnt_placed, width=1, space=1, main="Placed vs Not-placed",xlab="Status", 
        col=c("green","red"),names.arg=c("Not Placed","Placed"),ylim=c(0,105),xlim=c(0,10))

Creating Scatter Plot for showing the variation of salary with different other variables

library(car)

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

scatterplotMatrix(~salary+gmat_tot+s_avg+age+f_avg+work_yrs |sex, data=MBA_start, main="Variation of Salary with different variables")

Corrgram Generation

library(corrgram)
corrgram(Salary_known, order=TRUE, lower.panel=panel.shade,upper.panel=panel.pie, text.panel=panel.txt,main="Corrgram for MBA starting salary analysis")

Applying Pearson Chi square Test for Salary and Sex

c1 <- xtabs (~ salary + sex, data=Salary_known)
chisq.test(c1)

## Warning in chisq.test(c1): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  c1
## X-squared = 55.494, df = 42, p-value = 0.07929

Applying Pearson Chi square Test for Quartile Rank and Salary

c2 <- xtabs (~ salary + quarter, data=Salary_known)
chisq.test(c2)

## Warning in chisq.test(c2): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  c2
## X-squared = 132.64, df = 126, p-value = 0.3252

Applying Pearson Chi square Test for Salary and First Language

c3 <- xtabs (~ salary + frstlang, data=Salary_known)
chisq.test(c3)

## Warning in chisq.test(c3): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  c3
## X-squared = 62.016, df = 42, p-value = 0.02384

Applying Pearson Chi square Test for Placement Status and Sex

c4 <- xtabs(~ placed + sex, data=Salary_known)
chisq.test(c4)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  c4
## X-squared = 0.29208, df = 1, p-value = 0.5889

Applying Pearson Chi square Test for Placement Status and Age

c5 <- xtabs(~ placed + age, data=Salary_known)
chisq.test(c5)

## Warning in chisq.test(c5): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  c5
## X-squared = 27.943, df = 20, p-value = 0.1108

Applying Pearson Chi square Test for Placement Status and Work Experience

c6 <- xtabs(~ placed + work_yrs, data=Salary_known)
chisq.test(c6)

## Warning in chisq.test(c6): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  c6
## X-squared = 24.663, df = 17, p-value = 0.1025

Applying Pearson Chi square Test for Placement Status and First Language

c7 <- xtabs(~ placed + frstlang, data=Salary_known)
chisq.test(c7)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  c7
## X-squared = 0.074127, df = 1, p-value = 0.7854

Correlation Matrix

(cor(Salary_known))

##                   age          sex      gmat_tot     gmat_qpc     gmat_vpc
## age       1.000000000 -0.031876273 -1.256220e-01 -0.220590341 -0.006721674
## sex      -0.031876273  1.000000000 -4.351109e-02 -0.167904888  0.099184398
## gmat_tot -0.125622047 -0.043511095  1.000000e+00  0.743099719  0.752906719
## gmat_qpc -0.220590341 -0.167904888  7.430997e-01  1.000000000  0.175497777
## gmat_vpc -0.006721674  0.099184398  7.529067e-01  0.175497777  1.000000000
## gmat_tpc -0.131681932 -0.012849186  8.791496e-01  0.690581939  0.688039929
## s_avg     0.164342257  0.073368077  1.435675e-01  0.019038162  0.190665307
## f_avg    -0.034290725  0.042895288  1.010821e-01  0.130285115  0.033106093
## quarter  -0.076614994 -0.086616877 -8.407099e-02  0.008601267 -0.139400223
## work_yrs  0.871679595 -0.023832548 -1.736909e-01 -0.241384675 -0.041357878
## frstlang  0.097619028 -0.008488358 -9.557089e-02  0.094537575 -0.295162826
## salary   -0.130198680  0.018516965 -5.685962e-05  0.028391635  0.003389965
## satis    -0.073500580 -0.061738773  7.981946e-02 -0.020006117  0.195134711
## placed   -0.205697192  0.050470540  1.491495e-02  0.026982025  0.028880982
##             gmat_tpc       s_avg        f_avg      quarter     work_yrs
## age      -0.13168193  0.16434226 -0.034290725 -0.076614994  0.871679595
## sex      -0.01284919  0.07336808  0.042895288 -0.086616877 -0.023832548
## gmat_tot  0.87914961  0.14356746  0.101082103 -0.084070990 -0.173690863
## gmat_qpc  0.69058194  0.01903816  0.130285115  0.008601267 -0.241384675
## gmat_vpc  0.68803993  0.19066531  0.033106093 -0.139400223 -0.041357878
## gmat_tpc  1.00000000  0.18894788  0.109811857 -0.128533421 -0.166139876
## s_avg     0.18894788  1.00000000  0.520554250 -0.735421726  0.159136628
## f_avg     0.10981186  0.52055425  1.000000000 -0.382421186 -0.047951357
## quarter  -0.12853342 -0.73542173 -0.382421186  1.000000000 -0.126454286
## work_yrs -0.16613988  0.15913663 -0.047951357 -0.126454286  1.000000000
## frstlang -0.10789784 -0.12631935 -0.055830525  0.089504320 -0.002916547
## salary    0.06094464  0.09632412  0.008846655 -0.147257809 -0.053266846
## satis     0.13288434 -0.04639953 -0.114704819  0.067729421 -0.007722658
## placed    0.08264631  0.08063913  0.027460510 -0.127882161 -0.123303946
##              frstlang        salary        satis      placed
## age       0.097619028 -1.301987e-01 -0.073500580 -0.20569719
## sex      -0.008488358  1.851696e-02 -0.061738773  0.05047054
## gmat_tot -0.095570885 -5.685962e-05  0.079819458  0.01491495
## gmat_qpc  0.094537575  2.839164e-02 -0.020006117  0.02698202
## gmat_vpc -0.295162826  3.389965e-03  0.195134711  0.02888098
## gmat_tpc -0.107897839  6.094464e-02  0.132884339  0.08264631
## s_avg    -0.126319350  9.632412e-02 -0.046399534  0.08063913
## f_avg    -0.055830525  8.846655e-03 -0.114704819  0.02746051
## quarter   0.089504320 -1.472578e-01  0.067729421 -0.12788216
## work_yrs -0.002916547 -5.326685e-02 -0.007722658 -0.12330395
## frstlang  1.000000000  7.125825e-03 -0.135986251 -0.03899476
## salary    0.007125825  1.000000e+00  0.156439455  0.96951510
## satis    -0.135986251  1.564395e-01  1.000000000  0.16882557
## placed   -0.038994758  9.695151e-01  0.168825569  1.00000000

Regression Analysis

reg <- lm(salary ~ age + sex + gmat_qpc + gmat_vpc +gmat_tpc + s_avg + f_avg + work_yrs + frstlang
 + satis, data = Salary_known)
summary(reg)

## 
## Call:
## lm(formula = salary ~ age + sex + gmat_qpc + gmat_vpc + gmat_tpc + 
##     s_avg + f_avg + work_yrs + frstlang + satis, data = Salary_known)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -93924 -48912  20019  44376 179796 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  40696.2    70338.2   0.579   0.5636  
## age          -4376.4     1916.4  -2.284   0.0235 *
## sex           1749.0     8733.1   0.200   0.8415  
## gmat_qpc      -246.8      442.3  -0.558   0.5775  
## gmat_vpc      -381.8      406.9  -0.938   0.3494  
## gmat_tpc       571.8      652.3   0.876   0.3819  
## s_avg        22054.8    12502.0   1.764   0.0794 .
## f_avg        -5868.9     8794.0  -0.667   0.5054  
## work_yrs      3252.9     2171.5   1.498   0.1359  
## frstlang     13151.7    15529.4   0.847   0.3982  
## satis         9967.1     5137.4   1.940   0.0539 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52430 on 182 degrees of freedom
## Multiple R-squared:  0.07776,    Adjusted R-squared:  0.02709 
## F-statistic: 1.535 on 10 and 182 DF,  p-value: 0.1301

Summary

According to the performed Analysis on the MBA Starting Salary File, the following insights can be drawn:

The salary of MBA students is most prominently impacted by the parameters of satisfaction level,first language of english, spring mba average(s_avg), and the age of students.

2.The salary offered to students is higher if they are from Spring MBA average.

The age estimate comes out to be negative implying that age is inversely proportional to the starting salary, experienced students got higher salary.
With a unit rise in satisfaction level of students, the salary rises by almost 10000 units.
The students having first language as English are contributing significantly for starting salary of MBA Students.
With a unit rise in work experience years, the salary rises by almost 3300 units.

MBA Starting Salaries Analysis

Prashant

January 31, 2018