Read Data

mba.df <- read.csv("MBA Starting Salaries Data.csv")
View(mba.df)

Including Plots

library(psych)
describe(mba.df)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

AGE

par(mfrow=c(1,2))
age <- table(mba.df$age)
boxplot(mba.df$age,main = "Boxplot of Age",ylab="age",col= "lightblue")
barplot(age,main = "AGE",xlab = "Ages",ylab = "Count")

SEX

mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex,main ="GENDER DITRIBUTION", col ="lightgreen")

GMAT TOTAL SCORES

par(mfrow=c(1,2))
hist(mba.df$gmat_tot,main = " GMAT TOTAL SCORE",col ="green",xlab = "gmat scores")
boxplot(mba.df$gmat_tot,main="BOXPLOT GMAT TOTAL",col="green")

WORK EXPERIENCE

hist(mba.df$work_yrs,col="pink",xlab="No. of years of work experience",main = "Work Experience",breaks = 20)

LANGUAGE

mba.df$frstlang <- factor(mba.df$frstlang, levels = c(1,2), labels = c("English","Others"))
plot(mba.df$frstlang)

SATISFACTION LEVEL

newlevel <- mba.df[which(mba.df$satis<='7'),]
hist(newlevel$satis,breaks =5,col="green",xlab="Degree of Satisfaction (1=low,7=high)", main="Satisfaction  distribution")

STARTING SALARIES

job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]

hist(job.df$salary, breaks=5,col="purple",xlab="starting salary", main="Salary  distribution")

SCATTERPLOTS SALARY VS AGE

par(mfrow=c(1,1))
library(car)    
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(salary ~age,     data=job.df,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter plot of salary vs age",
            xlab="age",
            ylab="salary")

SALARY VS SEX

par(mfrow=c(1,1))
options(scipen=999) 
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
data("job.df", package = "ggplot2")  
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
gg<-ggplot(job.df, aes(x=sex, y=salary))+geom_boxplot(, size =2)+
     labs(title="SEX VS SALARY", y="Salary", x="SEX")
plot(gg)

SALARY VS GMAT TOTAL

options(scipen=999) 
library(ggplot2)
data("job.df",package = "ggplot2")
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
ggplot(job.df,aes(x= gmat_tot,y=salary)) +geom_point(col="blue",size=2)+
      geom_smooth(method = "lm")+
      labs(title="SCATTERPLOT",subtitle="SALARY VS GMAT TOTAL", x="GMAT TOTAL", y="SALARY")

SALARY VS WORKEXPERIENCE

library(car)
scatterplot(salary~work_yrs,data = job.df, main="SCATTERPLOT OF SALARY VS WORKEXPERIENCE", xlab="Work Experience in Years", ylab = "Salary")

SALARY VS LANGUAGE

par(mfrow=c(1,1))
options(scipen=999) 
library(ggplot2)
data("job.df", package = "ggplot2")  
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
gg<-ggplot(job.df, aes(x=frstlang, y=salary))+geom_boxplot(, size =2)+
     labs(title="SALARY VS First Language", y="Salary", x="First Language")
plot(gg)

Corrgram

library(corrgram)
    corrgram(job.df, order=TRUE, lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="MBA starting salary analysis Correlogram")

Variance - Covariance Matrix

  x <- job.df[,c("age", "gmat_tot",       "gmat_qpc","gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   y <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   cov(x,y)
##                      age    gmat_tot     gmat_qpc     gmat_vpc
## age          17.78562176  -29.954933   -14.089729   -0.4564443
## gmat_tot    -29.95493307 3196.950561   636.350928  685.4644322
## gmat_qpc    -14.08972906  636.350928   229.384067   42.7985481
## gmat_vpc     -0.45644430  685.464432    42.798548  259.2695920
## gmat_tpc     -7.51276446  672.465188   141.493307  149.8747571
## s_avg         0.26269133    3.076706     0.109287    1.1636153
## f_avg        -0.07513817    2.969557     1.025241    0.2769703
## work_yrs     13.55880289  -36.222204   -13.484078   -2.4562014
## salary   -29185.28497409 -170.881369 22855.717832 2901.3078044
##               gmat_tpc        s_avg        f_avg        work_yrs
## age         -7.5127645    0.2626913  -0.07513817     13.55880289
## gmat_tot   672.4651878    3.0767055   2.96955689    -36.22220423
## gmat_qpc   141.4933074    0.1092870   1.02524072    -13.48407815
## gmat_vpc   149.8747571    1.1636153   0.27697026     -2.45620142
## gmat_tpc   183.0113882    0.9688199   0.77185854     -8.28977763
## s_avg        0.9688199    0.1436561   0.10251263      0.22246519
## f_avg        0.7718585    0.1025126   0.26995964     -0.09189254
## work_yrs    -8.2897776    0.2224652  -0.09189254     13.60378886
## salary   43822.5291991 1940.5276360 244.31568869 -10442.62667314
##                   salary
## age          -29185.2850
## gmat_tot       -170.8814
## gmat_qpc      22855.7178
## gmat_vpc       2901.3078
## gmat_tpc      43822.5292
## s_avg          1940.5276
## f_avg           244.3157
## work_yrs     -10442.6267
## salary   2825177000.1131

subset of the dataset consisting of only those people who actually got a job.

job11.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
job11.df
##     age    sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35   22 Female      660       90       92       94  3.50  3.75       1
## 36   27 Female      700       94       98       98  3.30  3.25       1
## 37   25 Female      680       87       96       96  3.50  2.67       1
## 38   25 Female      650       82       91       93  3.40  3.25       1
## 39   27   Male      710       96       96       98  3.30  3.50       1
## 40   28 Female      620       52       98       87  3.40  3.75       1
## 41   24   Male      670       84       96       95  3.30  3.25       1
## 42   25 Female      560       52       81       72  3.30  3.50       1
## 43   25 Female      530       50       62       61  3.60  3.67       1
## 44   25   Male      650       79       93       93  3.30  3.50       1
## 45   26 Female      590       56       89       81  3.30  3.25       1
## 46   23 Female      650       93       81       93  3.40  3.00       1
## 47   24   Male      560       81       50       71  3.40  3.67       1
## 48   27   Male      610       72       84       86  3.30  3.50       1
## 49   25   Male      650       95       84       93  3.30  3.00       1
## 50   25   Male      550       74       50       68  3.50  3.50       1
## 51   26   Male      570       68       74       75  3.80  3.50       1
## 52   26   Male      580       79       71       78  3.45  3.50       1
## 53   30   Male      600       60       91       83  3.30  3.25       1
## 54   31   Male      570       72       71       75  3.60  3.50       1
## 55   30   Male      620       60       96       87  3.50  3.00       1
## 56   30 Female      680       96       87       96  3.70  3.60       1
## 57   27   Male      630       93       75       91  3.30  3.25       1
## 58   25   Male      600       82       74       83  3.50  3.25       1
## 59   28 Female      640       89       81       91  3.60  3.50       1
## 60   39   Male      600       72       81       83  3.60  3.50       1
## 61   27   Male      570       95       33       75  3.70  4.00       1
## 62   27   Male      710       95       98       98  3.60  3.50       1
## 63   33   Male      620       72       89       87  3.50  3.50       1
## 64   27   Male      600       67       84       83  3.50  3.00       1
## 65   28   Male      700       95       95       98  3.80  4.00       1
## 66   30   Male      600       77       81       84  3.50  3.25       1
## 67   30 Female      670       87       95       95  3.30  3.25       1
## 68   40   Male      630       71       95       91  4.00  0.00       1
## 69   25   Male      700       98       93       98  3.60  3.75       1
## 115  26 Female      670       87       95       95  3.10  3.33       2
## 116  25 Female      620       89       74       87  3.10  3.50       2
## 117  31   Male      540       60       62       65  3.10  3.00       2
## 118  25   Male      670       95       89       95  3.20  3.50       2
## 119  25   Male      610       87       71       86  3.27  3.25       2
## 120  24   Male      560       52       81       72  3.20  3.25       2
## 121  24   Male      500       78       30       52  3.00  2.75       2
## 122  23   Male      590       72       81       81  3.20  3.25       2
## 123  24   Male      570       82       58       75  3.20  3.25       2
## 124  26 Female      570       93       37       75  3.00  2.75       2
## 125  28 Female      580       83       58       79  3.10  3.00       2
## 126  24 Female      580       72       71       78  3.00  3.25       2
## 127  31   Male      560       68       67       72  3.09  3.00       2
## 128  25 Female      620       89       74       87  3.10  3.50       2
## 129  27   Male      620       97       63       88  3.20  3.00       2
## 130  28   Male      560       75       58       72  3.20  3.25       2
## 131  26   Male      680       84       96       96  3.20  3.25       2
## 132  27   Male      620       81       87       89  3.00  3.00       2
## 133  34   Male      550       72       58       69  3.00  3.00       2
## 134  26   Male      600       84       67       83  3.09  3.50       2
## 135  29   Male      670       91       93       95  3.10  3.00       2
## 136  24   Male      620       84       81       87  3.00  3.25       2
## 137  27   Male      630       72       95       89  3.20  3.00       2
## 138  26   Male      650       89       87       93  3.20  3.25       2
## 139  24   Male      620       88       74       87  3.10  3.00       2
## 186  23 Female      520       43       67       58  2.90  2.75       3
## 187  27   Male      620       87       74       87  2.70  2.75       3
## 188  25   Male      580       78       67       80  2.90  3.25       3
## 189  25   Male      630       75       93       89  2.70  2.50       3
## 190  25   Male      610       89       74       87  2.70  2.75       3
## 191  29 Female      560       64       71       72  2.90  3.00       3
## 192  27   Male      620       79       87       88  2.90  2.75       3
## 193  28   Male      580       72       71       78  2.80  3.00       3
## 194  24 Female      670       83       98       96  2.90  3.25       3
## 195  25 Female      560       39       91       72  2.90  3.00       3
## 196  25 Female      580       72       71       78  2.80  3.25       3
## 197  27   Male      680       97       90       97  2.90  2.75       3
## 198  28   Male      610       89       67       86  2.70  3.00       3
## 199  29   Male      710       93       98       99  2.90  3.25       3
## 200  24   Male      710       99       92       99  2.90  3.00       3
## 201  25 Female      630       84       87       89  2.80  2.75       3
## 202  24 Female      600       89       67       85  2.80  3.00       3
## 203  29   Male      660       91       90       95  2.80  3.00       3
## 204  30   Male      670       83       97       96  2.80  2.75       3
## 205  24   Male      580       89       54       78  2.91  2.83       3
## 206  29   Male      680       79       99       96  2.90  3.00       3
## 207  32   Male      660       83       95       94  2.90  3.50       3
## 208  28   Male      570       56       84       75  2.90  3.00       3
## 209  24   Male      680       96       87       97  2.80  2.75       3
## 256  24 Female      560       55       78       71  3.50  3.25       4
## 257  23   Male      660       81       98       95  2.50  3.00       4
## 258  25 Female      720       96       98       99  3.50  3.60       4
## 259  26   Male      620       78       87       89  2.40  2.00       4
## 260  26 Female      630       85       81       90  2.90  3.25       4
## 261  27   Male      650       89       89       93  2.40  2.25       4
## 262  25   Male      660       99       71       95  3.40  3.25       4
## 263  25   Male      610       83       81       86  2.40  2.75       4
## 264  26   Male      600       87       62       83  2.50  2.50       4
## 265  24   Male      570       75       62       75  2.30  2.50       4
## 266  24 Female      600       77       78       84  2.60  3.00       4
## 267  26 Female      650       91       84       93  2.60  3.00       4
## 268  29   Male      630       72       95       89  2.60  2.50       4
## 269  26   Male      630       96       71       91  2.60  2.75       4
## 270  31   Male      530       75       45       62  2.40  2.75       4
## 271  23   Male      580       64       81       78  2.20  2.00       4
## 272  25   Male      540       79       45       65  2.60  2.50       4
## 273  26   Male      550       72       58       69  2.60  2.75       4
## 274  40 Female      500       60       45       51  2.50  2.75       4
##     work_yrs frstlang salary satis
## 35         1  English  85000     5
## 36         2  English  85000     6
## 37         2  English  86000     5
## 38         3  English  88000     7
## 39         2  English  92000     6
## 40         5  English  93000     5
## 41         0  English  95000     4
## 42         1  English  95000     5
## 43         3  English  95000     3
## 44         1  English  96000     7
## 45         4  English  96000     5
## 46         2  English 100000     7
## 47         2  English 100000     6
## 48         6  English 100000     6
## 49         2  English 105000     7
## 50         3  English 105000     6
## 51         3  English 105000     6
## 52         2  English 105000     5
## 53         5  English 105000     6
## 54         6  English 105000     6
## 55         8  English 106000     7
## 56         6  English 106000     6
## 57         3  English 107500     5
## 58         3  English 108000     6
## 59         6  English 110000     5
## 60        16  English 112000     7
## 61         4  English 115000     5
## 62         1  English 115000     5
## 63        10   Others 118000     7
## 64         3  English 120000     5
## 65         5  English 120000     5
## 66         5  English 120000     6
## 67         8  English 120000     6
## 68        15  English 146000     6
## 69         1  English 162000     5
## 115        1  English  82000     7
## 116        2  English  92000     5
## 117        8  English  93000     6
## 118        2  English  95000     6
## 119        3  English  95000     6
## 120        2  English  96000     7
## 121        2  English  96500     6
## 122        2  English  98000     6
## 123        2  English  98000     6
## 124        3   Others  98000     5
## 125        5   Others  99000     6
## 126        2  English 100000     5
## 127        4  English 100000     6
## 128        2  English 101000     5
## 129        3  English 103000     6
## 130        4  English 104000     5
## 131        3  English 105000     6
## 132        3  English 105000     5
## 133       16  English 105000     5
## 134        2  English 107000     5
## 135        6  English 112000     6
## 136        1  English 115000     6
## 137        4  English 115000     6
## 138        4  English 130000     7
## 139        2  English 145800     6
## 186        1  English  78256     5
## 187        3  English  88500     6
## 188        2  English  90000     7
## 189        2  English  90000     5
## 190        4  English  93000     6
## 191        5  English  95000     7
## 192        4  English  97000     7
## 193        3  English  97000     6
## 194        2  English  98000     7
## 195        2  English  98000     7
## 196        2  English  98000     6
## 197        2   Others  98000     6
## 198        4  English  98000     7
## 199        7  English  98000     5
## 200        3  English 100000     6
## 201        2  English 100000     6
## 202        2  English 101000     6
## 203        8  English 101100     6
## 204        6  English 102500     5
## 205        2  English 105000     5
## 206        6  English 106000     6
## 207        2   Others 107300     7
## 208        4  English 108000     6
## 209        2  English 112000     6
## 256        2  English  64000     7
## 257        2  English  77000     6
## 258        3  English  85000     6
## 259        2  English  85000     6
## 260        3  English  86000     5
## 261        5  English  90000     5
## 262        2  English  92000     7
## 263        2  English  95000     7
## 264        2  English  96000     6
## 265        2  English  98000     6
## 266        2  English 100000     6
## 267        2  English 100000     7
## 268        3  English 100400     7
## 269        3  English 101600     6
## 270        4   Others 104000     6
## 271        2  English 105000     6
## 272        3  English 115000     5
## 273        3  English 126710     6
## 274       15   Others 220000     6

Contingency tables showing the affect of various factors on the starting salary

 mytable <-xtabs(~salary+sex,data=job11.df)
 mytable
##         sex
## salary   Male Female
##   64000     0      1
##   77000     1      0
##   78256     0      1
##   82000     0      1
##   85000     1      3
##   86000     0      2
##   88000     0      1
##   88500     1      0
##   90000     3      0
##   92000     2      1
##   93000     2      1
##   95000     4      3
##   96000     3      1
##   96500     1      0
##   97000     2      0
##   98000     6      4
##   99000     0      1
##   100000    4      5
##   100400    1      0
##   101000    0      2
##   101100    1      0
##   101600    1      0
##   102500    1      0
##   103000    1      0
##   104000    2      0
##   105000   11      0
##   106000    2      1
##   107000    1      0
##   107300    1      0
##   107500    1      0
##   108000    2      0
##   110000    0      1
##   112000    3      0
##   115000    5      0
##   118000    1      0
##   120000    3      1
##   126710    1      0
##   130000    1      0
##   145800    1      0
##   146000    1      0
##   162000    1      0
##   220000    0      1

From this table we see that most higher starting salaries have been awarded to men.

 mytable1 <-xtabs(~salary+work_yrs,data=job11.df)
    mytable1
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0

From the above table that a minimum of 2 years of work experience is necessary

 mytable2<-xtabs(~salary+frstlang,data=job11.df)
    mytable2
##         frstlang
## salary   English Others
##   64000        1      0
##   77000        1      0
##   78256        1      0
##   82000        1      0
##   85000        4      0
##   86000        2      0
##   88000        1      0
##   88500        1      0
##   90000        3      0
##   92000        3      0
##   93000        3      0
##   95000        7      0
##   96000        4      0
##   96500        1      0
##   97000        2      0
##   98000        8      2
##   99000        0      1
##   100000       9      0
##   100400       1      0
##   101000       2      0
##   101100       1      0
##   101600       1      0
##   102500       1      0
##   103000       1      0
##   104000       1      1
##   105000      11      0
##   106000       3      0
##   107000       1      0
##   107300       0      1
##   107500       1      0
##   108000       2      0
##   110000       1      0
##   112000       3      0
##   115000       5      0
##   118000       0      1
##   120000       4      0
##   126710       1      0
##   130000       1      0
##   145800       1      0
##   146000       1      0
##   162000       1      0
##   220000       0      1

It is seen that students with English as first language are mostly preferred and get higher salaries and jobs compared to those whose first language is not English.

  mytable3<-xtabs(~salary+gmat_tot,data=job11.df)
    mytable3
##         gmat_tot
## salary   500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
##   64000    0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   78256    0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   90000    0   0   0   0   0   0   0   1   0   0   0   0   1   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   93000    0   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0
##   95000    0   0   1   0   0   2   0   0   0   0   2   0   0   0   0   0
##   96000    0   0   0   0   0   1   0   0   1   1   0   0   0   0   1   0
##   96500    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
##   98000    0   0   0   0   0   1   3   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   100000   0   0   0   0   0   2   0   1   0   1   1   0   1   0   2   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   104000   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   2   0   2   3   0   1   0   1   0   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   108000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   112000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   115000   0   0   0   1   0   0   1   0   0   0   0   1   1   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   120000   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0
##   126710   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   670 680 700 710 720
##   64000    0   0   0   0   0
##   77000    0   0   0   0   0
##   78256    0   0   0   0   0
##   82000    1   0   0   0   0
##   85000    0   0   1   0   1
##   86000    0   1   0   0   0
##   88000    0   0   0   0   0
##   88500    0   0   0   0   0
##   90000    0   0   0   0   0
##   92000    0   0   0   1   0
##   93000    0   0   0   0   0
##   95000    2   0   0   0   0
##   96000    0   0   0   0   0
##   96500    0   0   0   0   0
##   97000    0   0   0   0   0
##   98000    1   1   0   1   0
##   99000    0   0   0   0   0
##   100000   0   0   0   1   0
##   100400   0   0   0   0   0
##   101000   0   0   0   0   0
##   101100   0   0   0   0   0
##   101600   0   0   0   0   0
##   102500   1   0   0   0   0
##   103000   0   0   0   0   0
##   104000   0   0   0   0   0
##   105000   0   1   0   0   0
##   106000   0   2   0   0   0
##   107000   0   0   0   0   0
##   107300   0   0   0   0   0
##   107500   0   0   0   0   0
##   108000   0   0   0   0   0
##   110000   0   0   0   0   0
##   112000   1   1   0   0   0
##   115000   0   0   0   1   0
##   118000   0   0   0   0   0
##   120000   1   0   1   0   0
##   126710   0   0   0   0   0
##   130000   0   0   0   0   0
##   145800   0   0   0   0   0
##   146000   0   0   0   0   0
##   162000   0   0   1   0   0
##   220000   0   0   0   0   0

T Tests

1.SALARY OF MALE AND FEMALE

NULL HHYPOTHESIS: There is no difference between salary of male and Female.

    log.transformed.salary=log(job11.df$salary)
t.test(log.transformed.salary~ job11.df$sex, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  log.transformed.salary by job11.df$sex
## t = 2.4552, df = 101, p-value = 0.01579
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01470674 0.13847594
## sample estimates:
##   mean in group Male mean in group Female 
##             11.55390             11.47731

pvalue<0.05 ,Hence We Reject the Null Hypothesis and prove that there is significant difference in Salary of Male and Female.

Pearson’s Corelation test

  1. Salary and Workexperience
cor.test(job.df$salary,job.df$work_yrs,var.equal = TRUE,paired = FALSE)
## 
##  Pearson's product-moment correlation
## 
## data:  job.df$salary and job.df$work_yrs
## t = -0.73721, df = 191, p-value = 0.4619
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.19305455  0.08864017
## sample estimates:
##         cor 
## -0.05326685

p-value<0.05.Hence its clarifies that there is relation between Salary and Work Experience.

  1. Salary And GMAT Score
cor.test(job.df$salary,job.df$gmat_tot,var.equal = TRUE,paired = FALSE)
## 
##  Pearson's product-moment correlation
## 
## data:  job.df$salary and job.df$gmat_tot
## t = -0.00078582, df = 191, p-value = 0.9994
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1412959  0.1411844
## sample estimates:
##            cor 
## -0.00005685962

p-value>0.05.Hence its clarifies that there is no relation between Salary and Work Experience.

Chi-Squared Test

chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable2
## X-squared = 69.847, df = 41, p-value = 0.003296

Since p<0.01 we can say that there is a relationship between first language and salary

chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable1
## X-squared = 535.23, df = 451, p-value = 0.003809

Since p<0.01 there is a relationship betweeen work experience and salary

    chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable3
## X-squared = 927.24, df = 820, p-value = 0.005279

Since p<0.01 we see there exists a relationship between Total GMAT score and starting salary. ## Regression Model

Model 1

job11.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
fit <- lm(job11.df$salary ~job11.df$gmat_tot+job11.df$gmat_qpc+job11.df$gmat_vpc+job11.df$gmat_tpc, data = job11.df)
summary(fit)
## 
## Call:
## lm(formula = job11.df$salary ~ job11.df$gmat_tot + job11.df$gmat_qpc + 
##     job11.df$gmat_vpc + job11.df$gmat_tpc, data = job11.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40370  -8250  -2164   5253 100097 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       109539.54   48054.24   2.279   0.0248 *
## job11.df$gmat_tot     55.01     181.71   0.303   0.7627  
## job11.df$gmat_qpc    718.40     541.90   1.326   0.1880  
## job11.df$gmat_vpc    546.10     543.85   1.004   0.3178  
## job11.df$gmat_tpc  -1663.16     801.57  -2.075   0.0406 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared:  0.06089,    Adjusted R-squared:  0.02256 
## F-statistic: 1.589 on 4 and 98 DF,  p-value: 0.1834

Gmat_tpc is a significant variable in model 1 The multiple R squared value indicates that the model accounts for 6% of the variance in the variables The residual error (17670) can be thought of as the average error in predicting salary using the various gmat data available.

Model 2

fit1 <- lm(job.df$salary ~job.df$satis+job.df$work_yrs+job.df$frstlang, data = job.df)
summary(fit1)
## 
## Call:
## lm(formula = job.df$salary ~ job.df$satis + job.df$work_yrs + 
##     job.df$frstlang, data = job.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -70468 -53495  25281  45013 165308 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            -5665.6    29404.9  -0.193   0.8474  
## job.df$satis           10983.1     4970.8   2.210   0.0283 *
## job.df$work_yrs         -748.6     1033.5  -0.724   0.4697  
## job.df$frstlangOthers   5687.9    14333.6   0.397   0.6919  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52820 on 189 degrees of freedom
## Multiple R-squared:  0.02799,    Adjusted R-squared:  0.01256 
## F-statistic: 1.814 on 3 and 189 DF,  p-value: 0.146

work_yrs and frstlang are significant variables in model 2 The multiple R squared value indicates that the model accounts for 24.66% of the variance in the variables.

Model 3

fit2 <- lm(job.df$salary ~job.df$age+job.df$sex, data = job.df)
summary(fit2)
## 
## Call:
## lm(formula = job.df$salary ~ job.df$age + job.df$sex, data = job.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63707 -55467  25564  44168 184091 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       99617.1    25493.0   3.908  0.00013 ***
## job.df$age        -1635.2      906.9  -1.803  0.07298 .  
## job.df$sexFemale   1698.4     8498.3   0.200  0.84181    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52970 on 190 degrees of freedom
## Multiple R-squared:  0.01716,    Adjusted R-squared:  0.006813 
## F-statistic: 1.658 on 2 and 190 DF,  p-value: 0.1932

Age is a significant factor in model 3

We see that model 2 is better than model 1 and model 3, with a higher R-squared value.

Regression model

As Model 2 is best fit for us we will consider the following equation.

y= B1x1 + B2x2 + B3x3 + B4x4 where y= salary of placed hence we will get the Beta coeficients

coefficients(fit1)
##           (Intercept)          job.df$satis       job.df$work_yrs 
##            -5665.5929            10983.1055             -748.6187 
## job.df$frstlangOthers 
##             5687.8724

above are the Beta coefficients. hence model is: salary= B1(work_yrs)+B2(Firstlanguage)

Dataset consisting of people with no job

 nojob.df<- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary==0), ]
    head(nojob.df)
##   age    sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 1  23 Female      620       77       87       87   3.4  3.00       1
## 2  24   Male      610       90       71       87   3.5  4.00       1
## 3  24   Male      670       99       78       95   3.3  3.25       1
## 4  24   Male      570       56       81       75   3.3  2.67       1
## 6  24   Male      640       82       89       91   3.9  3.75       1
## 7  25   Male      610       89       74       87   3.4  3.50       1
##   work_yrs frstlang salary satis
## 1        2  English      0     7
## 2        2  English      0     6
## 3        2  English      0     6
## 4        1  English      0     7
## 6        2  English      0     6
## 7        2  English      0     5
hist(nojob.df$gmat_tot,
     main = "GMAT performance of students with no job",
     xlab="GMAT score",
     breaks=10,
     col = "orange")

Distributed between 550-650 for unplaced students while it is more scattered amongst those who do have a job.

chi-Squared Test

    chisq.test(nojob.df$work_yrs,nojob.df$satis)
## Warning in chisq.test(nojob.df$work_yrs, nojob.df$satis): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob.df$work_yrs and nojob.df$satis
## X-squared = 44.974, df = 48, p-value = 0.5976

This shows that the null hypothesis is true and that unplaced students with work experience are satisfied with the MBA program

Summary 1. Students salary was significantly on Work experience and Language preferance