Summary

Salary.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
library(psych)
View(Salary.df)
describe(Salary.df)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45
str(Salary.df)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
summary(Salary.df)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

Boxplot for different variable

library(lattice)
bwplot(Salary.df$age)

bwplot(Salary.df$s_avg)

bwplot(Salary.df$f_avg)

bwplot(Salary.df$work_yrs)

bwplot(Salary.df$gmat_tot)

bwplot(Salary.df$gmat_qpc)

bwplot(Salary.df$gmat_vpc)

bwplot(Salary.df$gmat_tpc)

bwplot(Salary.df$frstlang)

bwplot(Salary.df$salary)

bwplot(Salary.df$satis)

ScatterPLots

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
par(mfrow=c(2,2))
scatterplot(gmat_tpc~gmat_qpc,gmat_vpc, data= Salary.df,
            spread=TRUE,
            main="GMAT total percentile vs GMAT Subject",
            xlab="GMAt percentile",
            ylab="Subject percentile")

scatterplot(quarter~salary, data= Salary.df,
            spread=TRUE,
            main="Starting salaries vs Quartile ranking",
            xlab="Starting salaries", ylab="Quartile ranking")

scatterplot(gmat_tpc~salary, data=Salary.df,
            spread=TRUE,
            main="GMAT percentile vs starting salaries",
            xlab="Starting salaries", ylab="GMAT percentile")

scatterplotMatrix(Salary.df[,c("gmat_tot","s_avg","f_avg","salary")],
                    spread = FALSE,smoother.args = list(lty=2),
                    main="MBA Starting Salary")

Corrgram

library(corrgram)
corrgram(Salary.df, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="MBA starting salaries analysis")

cov(Salary.df)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

Analysis of people who got job

Job.df <- Salary.df[ which(Salary.df$salary !="998" & Salary.df$salary !="999" & Salary.df$salary!="0"), ]
head(Job.df)
##    age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35  22   2      660       90       92       94   3.5  3.75       1
## 36  27   2      700       94       98       98   3.3  3.25       1
## 37  25   2      680       87       96       96   3.5  2.67       1
## 38  25   2      650       82       91       93   3.4  3.25       1
## 39  27   1      710       96       96       98   3.3  3.50       1
## 40  28   2      620       52       98       87   3.4  3.75       1
##    work_yrs frstlang salary satis
## 35        1        1  85000     5
## 36        2        1  85000     6
## 37        2        1  86000     5
## 38        3        1  88000     7
## 39        2        1  92000     6
## 40        5        1  93000     5

Different factors affecting Salary

xtabs(~salary+age,data=Job.df)
##         age
## salary   22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   85000   1  0  0  1  1  1  0  0  0  0  0  0  0  0  0
##   86000   0  0  0  1  1  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   92000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  1  0  0  1  0  0  1  0  0  0  0  0
##   95000   0  0  1  5  0  0  0  1  0  0  0  0  0  0  0
##   96000   0  0  1  1  2  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  1  1  0  0  0  0  0  0  0  0
##   98000   0  1  3  2  1  1  1  1  0  0  0  0  0  0  0
##   99000   0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   100000  0  1  4  1  1  1  0  0  0  1  0  0  0  0  0
##   100400  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101000  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101600  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   103000  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0
##   105000  0  1  1  2  3  1  0  0  1  1  0  0  1  0  0
##   106000  0  0  0  0  0  0  0  1  2  0  0  0  0  0  0
##   107000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
##   107500  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  0  1  0  0  0  0  0  1  0
##   115000  0  0  1  1  0  3  0  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   120000  0  0  0  0  0  1  1  0  2  0  0  0  0  0  0
##   126710  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   162000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
#"age group of 24-30 were more placed than the others"
xtabs(~salary+sex,data=Job.df)
##         sex
## salary    1  2
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
#"From this table we see that most higher starting salaries have been awarded to men."
xtabs(~salary+gmat_tot,data=Job.df)
##         gmat_tot
## salary   500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
##   64000    0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   78256    0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   90000    0   0   0   0   0   0   0   1   0   0   0   0   1   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   93000    0   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0
##   95000    0   0   1   0   0   2   0   0   0   0   2   0   0   0   0   0
##   96000    0   0   0   0   0   1   0   0   1   1   0   0   0   0   1   0
##   96500    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
##   98000    0   0   0   0   0   1   3   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   100000   0   0   0   0   0   2   0   1   0   1   1   0   1   0   2   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   104000   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   2   0   2   3   0   1   0   1   0   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   108000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   112000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   115000   0   0   0   1   0   0   1   0   0   0   0   1   1   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   120000   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0
##   126710   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   670 680 700 710 720
##   64000    0   0   0   0   0
##   77000    0   0   0   0   0
##   78256    0   0   0   0   0
##   82000    1   0   0   0   0
##   85000    0   0   1   0   1
##   86000    0   1   0   0   0
##   88000    0   0   0   0   0
##   88500    0   0   0   0   0
##   90000    0   0   0   0   0
##   92000    0   0   0   1   0
##   93000    0   0   0   0   0
##   95000    2   0   0   0   0
##   96000    0   0   0   0   0
##   96500    0   0   0   0   0
##   97000    0   0   0   0   0
##   98000    1   1   0   1   0
##   99000    0   0   0   0   0
##   100000   0   0   0   1   0
##   100400   0   0   0   0   0
##   101000   0   0   0   0   0
##   101100   0   0   0   0   0
##   101600   0   0   0   0   0
##   102500   1   0   0   0   0
##   103000   0   0   0   0   0
##   104000   0   0   0   0   0
##   105000   0   1   0   0   0
##   106000   0   2   0   0   0
##   107000   0   0   0   0   0
##   107300   0   0   0   0   0
##   107500   0   0   0   0   0
##   108000   0   0   0   0   0
##   110000   0   0   0   0   0
##   112000   1   1   0   0   0
##   115000   0   0   0   1   0
##   118000   0   0   0   0   0
##   120000   1   0   1   0   0
##   126710   0   0   0   0   0
##   130000   0   0   0   0   0
##   145800   0   0   0   0   0
##   146000   0   0   0   0   0
##   162000   0   0   1   0   0
##   220000   0   0   0   0   0
#"Students with gmat_tot>=560 are "
xtabs(~salary+s_avg,data=Job.df)
##         s_avg
## salary   2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.91 3 3.09 3.1 3.2 3.27 3.3 3.4
##   64000    0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   0
##   77000    0   0   0   1   0   0   0   0    0 0    0   0   0    0   0   0
##   78256    0   0   0   0   0   0   0   1    0 0    0   0   0    0   0   0
##   82000    0   0   0   0   0   0   0   0    0 0    0   1   0    0   0   0
##   85000    0   0   1   0   0   0   0   0    0 0    0   0   0    0   1   0
##   86000    0   0   0   0   0   0   0   1    0 0    0   0   0    0   0   0
##   88000    0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   1
##   88500    0   0   0   0   0   1   0   0    0 0    0   0   0    0   0   0
##   90000    0   0   1   0   0   1   0   1    0 0    0   0   0    0   0   0
##   92000    0   0   0   0   0   0   0   0    0 0    0   1   0    0   1   1
##   93000    0   0   0   0   0   1   0   0    0 0    0   1   0    0   0   1
##   95000    0   0   1   0   0   0   0   1    0 0    0   0   1    1   2   0
##   96000    0   0   0   1   0   0   0   0    0 0    0   0   1    0   2   0
##   96500    0   0   0   0   0   0   0   0    0 1    0   0   0    0   0   0
##   97000    0   0   0   0   0   0   1   1    0 0    0   0   0    0   0   0
##   98000    0   1   0   0   0   1   1   4    0 1    0   0   2    0   0   0
##   99000    0   0   0   0   0   0   0   0    0 0    0   1   0    0   0   0
##   100000   0   0   0   0   2   0   1   1    0 1    1   0   0    0   1   2
##   100400   0   0   0   0   1   0   0   0    0 0    0   0   0    0   0   0
##   101000   0   0   0   0   0   0   1   0    0 0    0   1   0    0   0   0
##   101100   0   0   0   0   0   0   1   0    0 0    0   0   0    0   0   0
##   101600   0   0   0   0   1   0   0   0    0 0    0   0   0    0   0   0
##   102500   0   0   0   0   0   0   1   0    0 0    0   0   0    0   0   0
##   103000   0   0   0   0   0   0   0   0    0 0    0   0   1    0   0   0
##   104000   0   0   1   0   0   0   0   0    0 0    0   0   1    0   0   0
##   105000   1   0   0   0   0   0   0   0    1 2    0   0   1    0   2   0
##   106000   0   0   0   0   0   0   0   1    0 0    0   0   0    0   0   0
##   107000   0   0   0   0   0   0   0   0    0 0    1   0   0    0   0   0
##   107300   0   0   0   0   0   0   0   1    0 0    0   0   0    0   0   0
##   107500   0   0   0   0   0   0   0   0    0 0    0   0   0    0   1   0
##   108000   0   0   0   0   0   0   0   1    0 0    0   0   0    0   0   0
##   110000   0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   0
##   112000   0   0   0   0   0   0   1   0    0 0    0   1   0    0   0   0
##   115000   0   0   0   0   1   0   0   0    0 1    0   0   1    0   0   0
##   118000   0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   0
##   120000   0   0   0   0   0   0   0   0    0 0    0   0   0    0   1   0
##   126710   0   0   0   0   1   0   0   0    0 0    0   0   0    0   0   0
##   130000   0   0   0   0   0   0   0   0    0 0    0   0   1    0   0   0
##   145800   0   0   0   0   0   0   0   0    0 0    0   1   0    0   0   0
##   146000   0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   0
##   162000   0   0   0   0   0   0   0   0    0 0    0   0   0    0   0   0
##   220000   0   0   0   1   0   0   0   0    0 0    0   0   0    0   0   0
##         s_avg
## salary   3.45 3.5 3.6 3.7 3.8 4
##   64000     0   1   0   0   0 0
##   77000     0   0   0   0   0 0
##   78256     0   0   0   0   0 0
##   82000     0   0   0   0   0 0
##   85000     0   2   0   0   0 0
##   86000     0   1   0   0   0 0
##   88000     0   0   0   0   0 0
##   88500     0   0   0   0   0 0
##   90000     0   0   0   0   0 0
##   92000     0   0   0   0   0 0
##   93000     0   0   0   0   0 0
##   95000     0   0   1   0   0 0
##   96000     0   0   0   0   0 0
##   96500     0   0   0   0   0 0
##   97000     0   0   0   0   0 0
##   98000     0   0   0   0   0 0
##   99000     0   0   0   0   0 0
##   100000    0   0   0   0   0 0
##   100400    0   0   0   0   0 0
##   101000    0   0   0   0   0 0
##   101100    0   0   0   0   0 0
##   101600    0   0   0   0   0 0
##   102500    0   0   0   0   0 0
##   103000    0   0   0   0   0 0
##   104000    0   0   0   0   0 0
##   105000    1   1   1   0   1 0
##   106000    0   1   0   1   0 0
##   107000    0   0   0   0   0 0
##   107300    0   0   0   0   0 0
##   107500    0   0   0   0   0 0
##   108000    0   1   0   0   0 0
##   110000    0   0   1   0   0 0
##   112000    0   0   1   0   0 0
##   115000    0   0   1   1   0 0
##   118000    0   1   0   0   0 0
##   120000    0   2   0   0   1 0
##   126710    0   0   0   0   0 0
##   130000    0   0   0   0   0 0
##   145800    0   0   0   0   0 0
##   146000    0   0   0   0   0 1
##   162000    0   0   1   0   0 0
##   220000    0   0   0   0   0 0
xtabs(~salary+f_avg,data=Job.df)
##         f_avg
## salary   0 2 2.25 2.5 2.67 2.75 2.83 3 3.25 3.33 3.5 3.6 3.67 3.75 4
##   64000  0 0    0   0    0    0    0 0    1    0   0   0    0    0 0
##   77000  0 0    0   0    0    0    0 1    0    0   0   0    0    0 0
##   78256  0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   82000  0 0    0   0    0    0    0 0    0    1   0   0    0    0 0
##   85000  0 1    0   0    0    0    0 0    1    0   0   1    0    1 0
##   86000  0 0    0   0    1    0    0 0    1    0   0   0    0    0 0
##   88000  0 0    0   0    0    0    0 0    1    0   0   0    0    0 0
##   88500  0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   90000  0 0    1   1    0    0    0 0    1    0   0   0    0    0 0
##   92000  0 0    0   0    0    0    0 0    1    0   2   0    0    0 0
##   93000  0 0    0   0    0    1    0 1    0    0   0   0    0    1 0
##   95000  0 0    0   0    0    1    0 1    2    0   2   0    1    0 0
##   96000  0 0    0   1    0    0    0 0    2    0   1   0    0    0 0
##   96500  0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   97000  0 0    0   0    0    1    0 1    0    0   0   0    0    0 0
##   98000  0 0    0   1    0    2    0 2    5    0   0   0    0    0 0
##   99000  0 0    0   0    0    0    0 1    0    0   0   0    0    0 0
##   100000 0 0    0   0    0    1    0 5    1    0   1   0    1    0 0
##   100400 0 0    0   1    0    0    0 0    0    0   0   0    0    0 0
##   101000 0 0    0   0    0    0    0 1    0    0   1   0    0    0 0
##   101100 0 0    0   0    0    0    0 1    0    0   0   0    0    0 0
##   101600 0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   102500 0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   103000 0 0    0   0    0    0    0 1    0    0   0   0    0    0 0
##   104000 0 0    0   0    0    1    0 0    1    0   0   0    0    0 0
##   105000 0 1    0   0    0    0    1 3    2    0   4   0    0    0 0
##   106000 0 0    0   0    0    0    0 2    0    0   0   1    0    0 0
##   107000 0 0    0   0    0    0    0 0    0    0   1   0    0    0 0
##   107300 0 0    0   0    0    0    0 0    0    0   1   0    0    0 0
##   107500 0 0    0   0    0    0    0 0    1    0   0   0    0    0 0
##   108000 0 0    0   0    0    0    0 1    1    0   0   0    0    0 0
##   110000 0 0    0   0    0    0    0 0    0    0   1   0    0    0 0
##   112000 0 0    0   0    0    1    0 1    0    0   1   0    0    0 0
##   115000 0 0    0   1    0    0    0 1    1    0   1   0    0    0 1
##   118000 0 0    0   0    0    0    0 0    0    0   1   0    0    0 0
##   120000 0 0    0   0    0    0    0 1    2    0   0   0    0    0 1
##   126710 0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
##   130000 0 0    0   0    0    0    0 0    1    0   0   0    0    0 0
##   145800 0 0    0   0    0    0    0 1    0    0   0   0    0    0 0
##   146000 1 0    0   0    0    0    0 0    0    0   0   0    0    0 0
##   162000 0 0    0   0    0    0    0 0    0    0   0   0    0    1 0
##   220000 0 0    0   0    0    1    0 0    0    0   0   0    0    0 0
xtabs(~salary+frstlang,data=Job.df)
##         frstlang
## salary    1  2
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
xtabs(~salary+work_yrs,data=Job.df)
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0
xtabs(~salary+satis,data=Job.df)
##         satis
## salary   3 4 5 6 7
##   64000  0 0 0 0 1
##   77000  0 0 0 1 0
##   78256  0 0 1 0 0
##   82000  0 0 0 0 1
##   85000  0 0 1 3 0
##   86000  0 0 2 0 0
##   88000  0 0 0 0 1
##   88500  0 0 0 1 0
##   90000  0 0 2 0 1
##   92000  0 0 1 1 1
##   93000  0 0 1 2 0
##   95000  1 1 1 2 2
##   96000  0 0 1 1 2
##   96500  0 0 0 1 0
##   97000  0 0 0 1 1
##   98000  0 0 2 5 3
##   99000  0 0 0 1 0
##   100000 0 0 1 6 2
##   100400 0 0 0 0 1
##   101000 0 0 1 1 0
##   101100 0 0 0 1 0
##   101600 0 0 0 1 0
##   102500 0 0 1 0 0
##   103000 0 0 0 1 0
##   104000 0 0 1 1 0
##   105000 0 0 4 6 1
##   106000 0 0 0 2 1
##   107000 0 0 1 0 0
##   107300 0 0 0 0 1
##   107500 0 0 1 0 0
##   108000 0 0 0 2 0
##   110000 0 0 1 0 0
##   112000 0 0 0 2 1
##   115000 0 0 3 2 0
##   118000 0 0 0 0 1
##   120000 0 0 2 2 0
##   126710 0 0 0 1 0
##   130000 0 0 0 0 1
##   145800 0 0 0 1 0
##   146000 0 0 0 1 0
##   162000 0 0 1 0 0
##   220000 0 0 0 1 0
adding a variable “GotPlaced”
Salary.df$GotPlaced = (Salary.df$salary >1000)
View(Salary.df)

Salary.df$GotPlaced <- factor(Salary.df$GotPlaced)
str(Salary.df)
## 'data.frame':    274 obs. of  14 variables:
##  $ age      : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex      : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot : int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc : int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc : int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc : int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg    : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg    : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs : int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang : int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary   : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis    : int  7 6 6 7 5 6 5 6 4 998 ...
##  $ GotPlaced: Factor w/ 2 levels "FALSE","TRUE": 1 1 1 1 1 1 1 1 1 1 ...

Contingency Table

#Number of Placed and Not Placed candiadtes
placed <- table(Salary.df$GotPlaced == 'TRUE')
placed
## 
## FALSE  TRUE 
##   171   103
#allPlaced / notPlaced versus Sex
placed_Sexwise <- xtabs(~ GotPlaced + sex , data=Salary.df)
placed_Sexwise
##          sex
## GotPlaced   1   2
##     FALSE 134  37
##     TRUE   72  31
#allPlaced / notPlaced versus First Language
placed_Language <- xtabs(~ GotPlaced + frstlang, data=Salary.df)
placed_Language
##          frstlang
## GotPlaced   1   2
##     FALSE 146  25
##     TRUE   96   7

CHI-SQAURE TEST

H1: The percentage of Females placed is more than Males

Chi Square Test : percentage of female who got placed is higher than percentage of male who got placed

chisq.test(placed_Sexwise)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  placed_Sexwise
## X-squared = 2.033, df = 1, p-value = 0.1539

H2: The percentage of people placed whose first language is English is higher than the percentage of people placed whose first language is not English

chisq.test(placed_Language)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  placed_Language
## X-squared = 3.0938, df = 1, p-value = 0.07859

T-TEST

t.test(Job.df$age,Job.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  Job.df$age and Job.df$salary
## t = -58.503, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106496.23  -99511.69
## sample estimates:
##   mean of x   mean of y 
##     26.7767 103030.7379
t.test(Job.df$sex,Job.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  Job.df$sex and Job.df$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106521.71  -99537.17
## sample estimates:
##    mean of x    mean of y 
## 1.300971e+00 1.030307e+05
t.test(Job.df$gmat_tot,Job.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  Job.df$gmat_tot and Job.df$salary
## t = -58.168, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -105907.00  -98922.43
## sample estimates:
##   mean of x   mean of y 
##    616.0194 103030.7379
t.test(Job.df$satis,Job.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  Job.df$satis and Job.df$salary
## t = -58.515, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106517.13  -99532.58
## sample estimates:
##    mean of x    mean of y 
## 5.883495e+00 1.030307e+05
t.test(Job.df$frstlang,Job.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  Job.df$frstlang and Job.df$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106521.9  -99537.4
## sample estimates:
##    mean of x    mean of y 
## 1.067961e+00 1.030307e+05

MODEL SELECTION

library(corrplot)
## corrplot 0.84 loaded
colnames(Job.df)
##  [1] "age"      "sex"      "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
##  [7] "s_avg"    "f_avg"    "quarter"  "work_yrs" "frstlang" "salary"  
## [13] "satis"
dataColumns <- Job.df[, c("age","work_yrs", "gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc", "s_avg", "f_avg", "quarter", "satis")]

N <- cor(dataColumns)
corrplot(N, method="circle")

res <- cor(dataColumns)
round(res, 2)
##            age work_yrs gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age       1.00     0.88    -0.08    -0.17     0.02    -0.10  0.16 -0.22
## work_yrs  0.88     1.00    -0.12    -0.18    -0.03    -0.13  0.16 -0.22
## gmat_tot -0.08    -0.12     1.00     0.67     0.78     0.97  0.17  0.12
## gmat_qpc -0.17    -0.18     0.67     1.00     0.09     0.66  0.02  0.10
## gmat_vpc  0.02    -0.03     0.78     0.09     1.00     0.78  0.16  0.02
## gmat_tpc -0.10    -0.13     0.97     0.66     0.78     1.00  0.14  0.07
## s_avg     0.16     0.16     0.17     0.02     0.16     0.14  1.00  0.45
## f_avg    -0.22    -0.22     0.12     0.10     0.02     0.07  0.45  1.00
## quarter  -0.13    -0.13    -0.11     0.01    -0.13    -0.10 -0.84 -0.43
## satis     0.11     0.06     0.06     0.00     0.15     0.12 -0.14 -0.12
##          quarter satis
## age        -0.13  0.11
## work_yrs   -0.13  0.06
## gmat_tot   -0.11  0.06
## gmat_qpc    0.01  0.00
## gmat_vpc   -0.13  0.15
## gmat_tpc   -0.10  0.12
## s_avg      -0.84 -0.14
## f_avg      -0.43 -0.12
## quarter     1.00  0.23
## satis       0.23  1.00

REGRESSION MODEL analysis

REGModel1 <- salary ~ 
             work_yrs + s_avg + f_avg + gmat_qpc + gmat_vpc + sex + frstlang + satis 
fit<- lm(REGModel1, data = Salary.df)
summary(fit)
## 
## Call:
## lm(formula = REGModel1, data = Salary.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -68686 -44138  -6378  47351 193852 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16262.819  34125.373   0.477   0.6341    
## work_yrs     -911.138    945.123  -0.964   0.3359    
## s_avg       23615.184   9555.538   2.471   0.0141 *  
## f_avg       -6608.208   6727.160  -0.982   0.3268    
## gmat_qpc      -23.938    212.555  -0.113   0.9104    
## gmat_vpc     -134.658    197.569  -0.682   0.4961    
## sex          4299.530   6928.943   0.621   0.5355    
## frstlang    -9074.381  10139.543  -0.895   0.3716    
## satis         -44.597      7.935  -5.620 4.82e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47950 on 265 degrees of freedom
## Multiple R-squared:  0.1403, Adjusted R-squared:  0.1144 
## F-statistic: 5.407 on 8 and 265 DF,  p-value: 2.569e-06
REGModel2 <- salary ~ 
             work_yrs + 
             sex +
             frstlang +
             satis 
fit <- lm(REGModel2, data = Salary.df)
summary(fit)
## 
## Call:
## lm(formula = REGModel2, data = Salary.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -53048 -46140  -1073  47952 182479 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 51950.743  14083.603   3.689 0.000273 ***
## work_yrs     -445.138    907.603  -0.490 0.624212    
## sex          5955.031   6747.840   0.883 0.378289    
## frstlang    -9695.438   9090.930  -1.066 0.287156    
## satis         -45.340      7.928  -5.719 2.85e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48170 on 269 degrees of freedom
## Multiple R-squared:  0.1194, Adjusted R-squared:  0.1063 
## F-statistic: 9.117 on 4 and 269 DF,  p-value: 6.396e-07

For analysis of people who dont have job.

 library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Total.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
NoJob.df <- setdiff(Total.df, Job.df)
head(NoJob.df)
##   age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter work_yrs
## 1  23   2      620       77       87       87   3.4  3.00       1        2
## 2  24   1      610       90       71       87   3.5  4.00       1        2
## 3  24   1      670       99       78       95   3.3  3.25       1        2
## 4  24   1      570       56       81       75   3.3  2.67       1        1
## 5  24   2      710       93       98       98   3.6  3.75       1        2
## 6  24   1      640       82       89       91   3.9  3.75       1        2
##   frstlang salary satis
## 1        1      0     7
## 2        1      0     6
## 3        1      0     6
## 4        1      0     7
## 5        1    999     5
## 6        1      0     6
xtabs(~salary+age,data=NoJob.df)
##       age
## salary 22 23 24 25 26 27 28 29 30 31 32 34 35 36 37 39 42 43 48
##    0    1  3 13  9 10 14  6 11  2  2  5  3  3  2  1  1  1  2  1
##    998  0  0  2 15 11 11  4  0  1  2  0  0  0  0  0  0  0  0  0
##    999  0  0  2  6  5  7  3  5  3  2  2  0  0  0  0  0  0  0  0
xtabs(~salary+sex,data=NoJob.df)
##       sex
## salary  1  2
##    0   67 23
##    998 37  9
##    999 30  5
xtabs(~salary+gmat_tot,data=NoJob.df)
##       gmat_tot
## salary 450 460 480 500 510 530 540 550 560 570 580 590 600 610 620 630 640
##    0     1   0   1   0   2   3   3   4   8   7   4   3   3   9   4   5   6
##    998   1   1   0   0   0   0   0   0   3   0   2   2   5   4   0   7   3
##    999   0   0   0   1   0   0   0   1   2   4   1   2   3   0   4   2   2
##       gmat_tot
## salary 650 660 670 680 690 700 710 720 730 740 750 760 790
##    0     5   3   4   3   0   2   4   2   1   1   1   1   0
##    998   2   3   5   2   2   0   1   1   0   2   0   0   0
##    999   2   3   1   1   2   0   1   0   1   1   0   0   1
xtabs(~salary+s_avg,data=NoJob.df)
##       s_avg
## salary  2 2.1 2.2 2.3 2.4 2.45 2.5 2.6 2.67 2.7 2.73 2.8 2.82 2.9  3 3.08
##    0    1   2   1   2   2    0   0   1    0   8    0   9    1   9 10    1
##    998  0   0   1   0   1    0   2   2    1   9    1   1    0   4  6    0
##    999  0   0   0   1   2    1   5   2    0   6    0   2    0   3  2    0
##       s_avg
## salary 3.09 3.1 3.17 3.18 3.2 3.25 3.27 3.3 3.38 3.4 3.45 3.5 3.56 3.6
##    0      2   6    1    0   4    1    2   9    1   7    1   2    0   4
##    998    0   3    0    1   2    0    0   2    0   3    1   3    1   0
##    999    0   4    0    0   1    0    0   3    0   1    0   1    0   1
##       s_avg
## salary 3.64 3.8 3.9  4
##    0      1   1   1  0
##    998    0   1   0  1
##    999    0   0   0  0
xtabs(~salary+f_avg,data=NoJob.df)
##       f_avg
## salary  0  2 2.25 2.33 2.5 2.67 2.75 2.8  3 3.17 3.2 3.25 3.33 3.4 3.5 3.6
##    0    1  3    2    0   8    1    9   0 24    1   1   18    1   1   7   1
##    998  1  0    1    1   4    0    7   0  9    1   0   11    0   0   3   0
##    999  0  0    1    0   4    1    7   2 10    0   1    4    1   1   2   0
##       f_avg
## salary 3.67 3.75 3.83  4
##    0      1    6    1  4
##    998    1    2    0  5
##    999    0    1    0  0
xtabs(~salary+frstlang,data=NoJob.df)
##       frstlang
## salary  1  2
##    0   82  8
##    998 38  8
##    999 26  9
xtabs(~salary+work_yrs,data=NoJob.df)
##       work_yrs
## salary  0  1  2  3  4  5  6  7  8  9 10 11 12 13 16 18 22
##    0    1 12 22 14  9 12  2  5  2  1  1  2  2  1  1  1  2
##    998  0  2 16 13 12  0  1  1  1  0  0  0  0  0  0  0  0
##    999  1  2  6  8 11  2  2  2  0  1  0  0  0  0  0  0  0
xtabs(~salary+satis,data=NoJob.df)
##       satis
## salary  1  2  3  4  5  6  7 998
##    0    0  0  0  4 36 40 10   0
##    998  0  0  0  0  0  0  0  46
##    999  1  1  4 12  9  7  1   0
t.test(NoJob.df$age,NoJob.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  NoJob.df$age and NoJob.df$salary
## t = -11.644, df = 170.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -520.7128 -369.7550
## sample estimates:
## mean of x mean of y 
##   27.7076  472.9415
t.test(NoJob.df$sex,NoJob.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  NoJob.df$sex and NoJob.df$salary
## t = -12.337, df = 170, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -547.2018 -396.2485
## sample estimates:
##  mean of x  mean of y 
##   1.216374 472.941520
t.test(NoJob.df$gmat_tot,NoJob.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  NoJob.df$gmat_tot and NoJob.df$salary
## t = 3.857, df = 175.12, p-value = 0.0001611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   72.55219 224.60570
## sample estimates:
## mean of x mean of y 
##  621.5205  472.9415
t.test(NoJob.df$satis,NoJob.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  NoJob.df$satis and NoJob.df$salary
## t = -3.9327, df = 334.87, p-value = 0.0001021
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -300.9328 -100.2602
## sample estimates:
## mean of x mean of y 
##  272.3450  472.9415
t.test(NoJob.df$frstlang,NoJob.df$salary)
## 
##  Welch Two Sample t-test
## 
## data:  NoJob.df$frstlang and NoJob.df$salary
## t = -12.339, df = 170, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -547.2720 -396.3186
## sample estimates:
##  mean of x  mean of y 
##   1.146199 472.941520

Conclusion

We have analysed almost all the aspect related to salary of the MBA graduate. Also different factors and their correlation.