Reading the dataset into R.

setwd("C:/Users/Kalyan/Downloads")
mbasal<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
View(mbasal)

Summarizing the dataset.

summary(mbasal)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
str(mbasal)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
library(psych)
describe(mbasal)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

Now since there are some people who didn’t answer the survey or didn’t disclose their salary,we need to eliminate their responses.

mbaproper<-mbasal[which(mbasal$salary!=998 & mbasal$salary!=999 & mbasal$salary !=0),]

Summarizing the proper dataset after eliminating responses.

summary(mbaproper)
##       age             sex           gmat_tot      gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :500   Min.   :39.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580   1st Qu.:72.00  
##  Median :26.00   Median :1.000   Median :620   Median :82.00  
##  Mean   :26.78   Mean   :1.301   Mean   :616   Mean   :79.73  
##  3rd Qu.:28.00   3rd Qu.:2.000   3rd Qu.:655   3rd Qu.:89.00  
##  Max.   :40.00   Max.   :2.000   Max.   :720   Max.   :99.00  
##     gmat_vpc        gmat_tpc         s_avg           f_avg      
##  Min.   :30.00   Min.   :51.00   Min.   :2.200   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.00   1st Qu.:2.850   1st Qu.:2.915  
##  Median :81.00   Median :87.00   Median :3.100   Median :3.250  
##  Mean   :78.56   Mean   :84.52   Mean   :3.092   Mean   :3.091  
##  3rd Qu.:92.00   3rd Qu.:93.50   3rd Qu.:3.400   3rd Qu.:3.415  
##  Max.   :99.00   Max.   :99.00   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs        frstlang         salary      
##  Min.   :1.000   Min.   : 0.00   Min.   :1.000   Min.   : 64000  
##  1st Qu.:1.000   1st Qu.: 2.00   1st Qu.:1.000   1st Qu.: 95000  
##  Median :2.000   Median : 3.00   Median :1.000   Median :100000  
##  Mean   :2.262   Mean   : 3.68   Mean   :1.068   Mean   :103031  
##  3rd Qu.:3.000   3rd Qu.: 4.00   3rd Qu.:1.000   3rd Qu.:106000  
##  Max.   :4.000   Max.   :16.00   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :3.000  
##  1st Qu.:5.000  
##  Median :6.000  
##  Mean   :5.883  
##  3rd Qu.:6.000  
##  Max.   :7.000
str(mbaproper)
## 'data.frame':    103 obs. of  13 variables:
##  $ age     : int  22 27 25 25 27 28 24 25 25 25 ...
##  $ sex     : int  2 2 2 2 1 2 1 2 2 1 ...
##  $ gmat_tot: int  660 700 680 650 710 620 670 560 530 650 ...
##  $ gmat_qpc: int  90 94 87 82 96 52 84 52 50 79 ...
##  $ gmat_vpc: int  92 98 96 91 96 98 96 81 62 93 ...
##  $ gmat_tpc: int  94 98 96 93 98 87 95 72 61 93 ...
##  $ s_avg   : num  3.5 3.3 3.5 3.4 3.3 3.4 3.3 3.3 3.6 3.3 ...
##  $ f_avg   : num  3.75 3.25 2.67 3.25 3.5 3.75 3.25 3.5 3.67 3.5 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  1 2 2 3 2 5 0 1 3 1 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ salary  : int  85000 85000 86000 88000 92000 93000 95000 95000 95000 96000 ...
##  $ satis   : int  5 6 5 7 6 5 4 5 3 7 ...
describe(mbaproper)
##          vars   n      mean       sd   median   trimmed     mad     min
## age         1 103     26.78     3.27 2.60e+01     26.30    2.97    22.0
## sex         2 103      1.30     0.46 1.00e+00      1.25    0.00     1.0
## gmat_tot    3 103    616.02    50.69 6.20e+02    615.90   59.30   500.0
## gmat_qpc    4 103     79.73    13.39 8.20e+01     81.05   13.34    39.0
## gmat_vpc    5 103     78.56    16.14 8.10e+01     80.33   16.31    30.0
## gmat_tpc    6 103     84.52    11.01 8.70e+01     85.60   11.86    51.0
## s_avg       7 103      3.09     0.38 3.10e+00      3.10    0.44     2.2
## f_avg       8 103      3.09     0.49 3.25e+00      3.13    0.37     0.0
## quarter     9 103      2.26     1.12 2.00e+00      2.20    1.48     1.0
## work_yrs   10 103      3.68     3.01 3.00e+00      3.11    1.48     0.0
## frstlang   11 103      1.07     0.25 1.00e+00      1.00    0.00     1.0
## salary     12 103 103030.74 17868.80 1.00e+05 101065.06 7413.00 64000.0
## satis      13 103      5.88     0.78 6.00e+00      5.89    1.48     3.0
##             max    range  skew kurtosis      se
## age          40     18.0  1.92     4.90    0.32
## sex           2      1.0  0.86    -1.28    0.05
## gmat_tot    720    220.0  0.01    -0.69    4.99
## gmat_qpc     99     60.0 -0.81     0.17    1.32
## gmat_vpc     99     69.0 -0.87     0.21    1.59
## gmat_tpc     99     48.0 -0.84     0.19    1.08
## s_avg         4      1.8 -0.13    -0.61    0.04
## f_avg         4      4.0 -2.52    13.86    0.05
## quarter       4      3.0  0.27    -1.34    0.11
## work_yrs     16     16.0  2.48     6.83    0.30
## frstlang      2      1.0  3.38     9.54    0.02
## salary   220000 156000.0  3.18    17.16 1760.67
## satis         7      4.0 -0.40     0.44    0.08

Visualizing some important variables independently

For age

boxplot(mbaproper$age,horizontal = TRUE,main="Age",xlab="years",col="yellow")

hist(mbaproper$age,main="Age",xlab="years",col="yellow")

For gmat total

boxplot(mbaproper$gmat_tot,horizontal = TRUE,main="Gmat total",xlab="total score",col="yellow")

hist(mbaproper$gmat_tot,main="Gmat total score",xlab="Gmat total score",col="yellow")

For gmat quantitative percentile

boxplot(mbaproper$gmat_qpc,horizontal = TRUE,main="Gmat quantitative percentile",xlab="Gmat quantitative percentile",col="yellow")

hist(mbaproper$gmat_qpc,main="Gmat quantitative percentile",xlab="Gmat quantitative percentile",col="yellow")

For gmat verbal percentile

boxplot(mbaproper$gmat_vpc,horizontal = TRUE,main="Gmat verbal percentile",xlab="Gmat verbal percentile",col="yellow")

hist(mbaproper$gmat_vpc,main="Gmat verbal percentile",xlab="Gmat verbal percentile",col="yellow")

For gmat total percentile

boxplot(mbaproper$gmat_tpc,horizontal = TRUE,main="Gmat total percentile",xlab="Gmat total percentile",col="yellow")

hist(mbaproper$gmat_tpc,main="Gmat total percentile",xlab="Gmat total percentile",col="yellow")

For spring MBA average

boxplot(mbaproper$s_avg,horizontal = TRUE,main="Spring MBA average",xlab="spring MBA average",col="yellow")

hist(mbaproper$s_avg,main="Spring MBA average",xlab="spring MBA average",col="yellow")

For fall MBA average

boxplot(mbaproper$f_avg,horizontal = TRUE,main="Fall MBA average",xlab="fall MBA average",col="yellow")

hist(mbaproper$f_avg,main="Fall MBA average",xlab="fall MBA average",col="yellow")

Let us visualize the questions from the case study.

Visualizing the variables correlated pairwise.

For the changes in salary with gender.

boxplot(mbaproper$salary ~mbaproper$sex,ylab = "Gender",xlab ="salary",main = "Changes in salary with gender",horizontal=TRUE,col="yellow")

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(mbaproper$salary ~mbaproper$sex,ylab = "Gender",xlab ="salary",main = "Changes in salary with gender",spread=FALSE)

Salary doesn’t change much with gender.

For the changes in salary with age.

boxplot(mbaproper$salary ~mbaproper$age,ylab = "Age",xlab ="salary",main = "Changes in salary with age",horizontal=TRUE,col="yellow")

scatterplot(mbaproper$salary ~mbaproper$age,ylab = "Age",xlab ="salary",main = "Changes in salary with age",spread=FALSE)

Salary and age are correlated.

For the changes in salary with work experience.

boxplot(mbaproper$salary ~mbaproper$work_yrs,ylab = "Work experience",xlab ="salary",main = "Changes in salary with work experience",horizontal=TRUE,col="yellow")

scatterplot(mbaproper$salary ~mbaproper$work_yrs,ylab = "Work experience",xlab ="salary",main = "Changes in salary with work experience",spread=FALSE)

There’s a good correlation between salary and work experience.

For the changes in salary with first language.

boxplot(mbaproper$salary ~mbaproper$frstlang,ylab = "First language",xlab ="salary",main = "Changes in salary with first language ",horizontal=TRUE,col="yellow")

scatterplot(mbaproper$salary ~mbaproper$frstlang,ylab = "First language",xlab ="salary",main = "Changes in salary with first language",spread=FALSE)

There’s no significant correlation between salary and work experience.

Relation between satisfaction and salary.

boxplot(mbaproper$salary ~mbaproper$satis,ylab = "Satisfacion",xlab ="salary",main = "Relation of satisfaction with salary ",horizontal=TRUE,col="yellow")

scatterplot(mbaproper$salary ~mbaproper$satis,ylab = "Satisfaction",xlab ="salary",main = "Relation of satisfaction with salary",spread=FALSE)

There’s no significant relation between satisfaction and salary.

Scatterplotmatrix

library(car)
scatterplotMatrix(formula = ~age+gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc+s_avg+f_avg+work_yrs+salary ,data=mbaproper)

Corrgram plot

library(corrgram)
corrgram(mbaproper, order=FALSE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of variables in MBA starting salaries ")

Correlations

cor(mbaproper)
##                  age         sex    gmat_tot     gmat_qpc    gmat_vpc
## age       1.00000000 -0.14352927 -0.07871678 -0.165039057  0.01799420
## sex      -0.14352927  1.00000000 -0.01955548 -0.147099027  0.05341428
## gmat_tot -0.07871678 -0.01955548  1.00000000  0.666382266  0.78038546
## gmat_qpc -0.16503906 -0.14709903  0.66638227  1.000000000  0.09466541
## gmat_vpc  0.01799420  0.05341428  0.78038546  0.094665411  1.00000000
## gmat_tpc -0.09609156 -0.04686981  0.96680810  0.658650025  0.78443167
## s_avg     0.15654954  0.08079985  0.17198874  0.015471662  0.15865101
## f_avg    -0.21699191  0.16572186  0.12246257  0.098418869  0.02290167
## quarter  -0.12568145 -0.02139041 -0.10578964  0.012648346 -0.12862079
## work_yrs  0.88052470 -0.09233003 -0.12280018 -0.182701263 -0.02812182
## frstlang  0.35026743  0.07512009 -0.13164323  0.014198516 -0.21835333
## salary    0.49964284 -0.16628869 -0.09067141  0.014141299 -0.13743230
## satis     0.10832308 -0.09199534  0.06474206 -0.003984632  0.14863481
##             gmat_tpc       s_avg       f_avg     quarter    work_yrs
## age      -0.09609156  0.15654954 -0.21699191 -0.12568145  0.88052470
## sex      -0.04686981  0.08079985  0.16572186 -0.02139041 -0.09233003
## gmat_tot  0.96680810  0.17198874  0.12246257 -0.10578964 -0.12280018
## gmat_qpc  0.65865003  0.01547166  0.09841887  0.01264835 -0.18270126
## gmat_vpc  0.78443167  0.15865101  0.02290167 -0.12862079 -0.02812182
## gmat_tpc  1.00000000  0.13938500  0.07051391 -0.09955033 -0.13246963
## s_avg     0.13938500  1.00000000  0.44590413 -0.84038355  0.16328236
## f_avg     0.07051391  0.44590413  1.00000000 -0.43144819 -0.21633018
## quarter  -0.09955033 -0.84038355 -0.43144819  1.00000000 -0.12896722
## work_yrs -0.13246963  0.16328236 -0.21633018 -0.12896722  1.00000000
## frstlang -0.16437561 -0.13788905 -0.05061394  0.10955726  0.19627277
## salary   -0.13201783  0.10173175 -0.10603897 -0.12848526  0.45466634
## satis     0.11630842 -0.14356557 -0.11773304  0.22511985  0.06299926
##             frstlang      salary        satis
## age       0.35026743  0.49964284  0.108323083
## sex       0.07512009 -0.16628869 -0.091995338
## gmat_tot -0.13164323 -0.09067141  0.064742057
## gmat_qpc  0.01419852  0.01414130 -0.003984632
## gmat_vpc -0.21835333 -0.13743230  0.148634805
## gmat_tpc -0.16437561 -0.13201783  0.116308417
## s_avg    -0.13788905  0.10173175 -0.143565573
## f_avg    -0.05061394 -0.10603897 -0.117733043
## quarter   0.10955726 -0.12848526  0.225119851
## work_yrs  0.19627277  0.45466634  0.062999256
## frstlang  1.00000000  0.26701953  0.089834769
## salary    0.26701953  1.00000000 -0.040050600
## satis     0.08983477 -0.04005060  1.000000000

Covariance matrix

cov(mbaproper)
##                    age           sex      gmat_tot      gmat_qpc
## age         10.7045498 -2.164477e-01 -1.305445e+01   -7.22796497
## sex         -0.2164477  2.124500e-01 -4.568818e-01   -0.90757662
## gmat_tot   -13.0544451 -4.568818e-01  2.569294e+03  452.14258519
## gmat_qpc    -7.2279650 -9.075766e-01  4.521426e+02  179.18027794
## gmat_vpc     0.9505045  3.974872e-01  6.386360e+02   20.45849990
## gmat_tpc    -3.4602132 -2.377689e-01  5.393623e+02   97.03607462
## s_avg        0.1938587  1.409575e-02  3.299562e+00    0.07838473
## f_avg       -0.3462517  3.725395e-02  3.027432e+00    0.64252142
## quarter     -0.4604988 -1.104131e-02 -6.005140e+00    0.18960594
## work_yrs     8.6728536 -1.281173e-01 -1.873882e+01   -7.36245955
## frstlang     0.2898344  8.756901e-03 -1.687607e+00    0.04806777
## salary   29210.5193223 -1.369577e+03 -8.212449e+04 3382.43784504
## satis        0.2776509 -3.321911e-02  2.570912e+00   -0.04178565
##               gmat_vpc      gmat_tpc        s_avg         f_avg
## age       9.505045e-01 -3.460213e+00   0.19385875 -3.462517e-01
## sex       3.974872e-01 -2.377689e-01   0.01409575  3.725395e-02
## gmat_tot  6.386360e+02  5.393623e+02   3.29956215  3.027432e+00
## gmat_qpc  2.045850e+01  9.703607e+01   0.07838473  6.425214e-01
## gmat_vpc  2.606602e+02  1.393882e+02   0.96945936  1.803303e-01
## gmat_tpc  1.393882e+02  1.211342e+02   0.58062916  3.785056e-01
## s_avg     9.694594e-01  5.806292e-01   0.14325138  8.231046e-02
## f_avg     1.803303e-01  3.785056e-01   0.08231046  2.378638e-01
## quarter  -2.325528e+00 -1.227013e+00  -0.35620503 -2.356492e-01
## work_yrs -1.366838e+00 -4.389206e+00   0.18604797 -3.176271e-01
## frstlang -8.915858e-01 -4.575481e-01  -0.01319912 -6.243099e-03
## salary   -3.964803e+04 -2.596339e+04 688.02042071 -9.241129e+02
## satis     1.879973e+00  1.002856e+00  -0.04256901 -4.498382e-02
##                quarter      work_yrs      frstlang        salary
## age      -4.604988e-01     8.6728536  2.898344e-01  2.921052e+04
## sex      -1.104131e-02    -0.1281173  8.756901e-03 -1.369577e+03
## gmat_tot -6.005140e+00   -18.7388159 -1.687607e+00 -8.212449e+04
## gmat_qpc  1.896059e-01    -7.3624595  4.806777e-02  3.382438e+03
## gmat_vpc -2.325528e+00    -1.3668380 -8.915858e-01 -3.964803e+04
## gmat_tpc -1.227013e+00    -4.3892062 -4.575481e-01 -2.596339e+04
## s_avg    -3.562050e-01     0.1860480 -1.319912e-02  6.880204e+02
## f_avg    -2.356492e-01    -0.3176271 -6.243099e-03 -9.241129e+02
## quarter   1.254140e+00    -0.4347992  3.102989e-02 -2.571117e+03
## work_yrs -4.347992e-01     9.0630116  1.494384e-01  2.445820e+04
## frstlang  3.102989e-02     0.1494384  6.396345e-02  1.206714e+03
## salary   -2.571117e+03 24458.1995050  1.206714e+03  3.192940e+08
## satis     1.975062e-01     0.1485818  1.779935e-02 -5.606583e+02
##                  satis
## age         0.27765087
## sex        -0.03321911
## gmat_tot    2.57091186
## gmat_qpc   -0.04178565
## gmat_vpc    1.87997335
## gmat_tpc    1.00285551
## s_avg      -0.04256901
## f_avg      -0.04498382
## quarter     0.19750619
## work_yrs    0.14858176
## frstlang    0.01779935
## salary   -560.65829050
## satis       0.61374453

t tests to check salary dependence on factors

t.test(mbaproper$salary~mbaproper$sex)
## 
##  Welch Two Sample t-test
## 
## data:  mbaproper$salary by mbaproper$sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2 
##       104970.97        98524.39
t.test(mbaproper$salary~mbaproper$frstlang)
## 
##  Welch Two Sample t-test
## 
## data:  mbaproper$salary by mbaproper$frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -59933.62  22202.25
## sample estimates:
## mean in group 1 mean in group 2 
##        101748.6        120614.3

So since the p values are p>0.05 in both the cases,we accept the null hypothesis which says that salary is independent of gender and even first language.

fit1<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+quarter+work_yrs+satis,data=mbaproper)
summary(fit1)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + s_avg + f_avg + quarter + work_yrs + satis, data = mbaproper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26196  -8241   -324   5297  70000 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 69019.43   52376.30   1.318   0.1909  
## age          2379.27    1004.19   2.369   0.0199 *
## gmat_tot       29.52     176.18   0.168   0.8673  
## gmat_qpc      813.29     492.44   1.652   0.1020  
## gmat_tpc    -1479.96     713.20  -2.075   0.0408 *
## gmat_vpc      489.93     495.74   0.988   0.3256  
## s_avg       -3124.32    8046.45  -0.388   0.6987  
## f_avg       -2345.08    3855.93  -0.608   0.5446  
## quarter     -2787.20    2694.67  -1.034   0.3037  
## work_yrs      360.74    1087.30   0.332   0.7408  
## satis        -719.58    2136.17  -0.337   0.7370  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15410 on 92 degrees of freedom
## Multiple R-squared:  0.3296, Adjusted R-squared:  0.2567 
## F-statistic: 4.523 on 10 and 92 DF,  p-value: 3.341e-05
fit2<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+work_yrs+satis,data=mbaproper)
summary(fit2)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + s_avg + f_avg + work_yrs + satis, data = mbaproper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29429  -7405    358   5528  69521 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 54916.31   50589.39   1.086   0.2805  
## age          2437.73    1002.98   2.430   0.0170 *
## gmat_tot      -21.21     169.27  -0.125   0.9005  
## gmat_qpc      891.50     486.78   1.831   0.0702 .
## gmat_tpc    -1419.88     711.10  -1.997   0.0488 *
## gmat_vpc      579.60     488.29   1.187   0.2382  
## s_avg        3460.17    4923.28   0.703   0.4839  
## f_avg       -1642.93    3797.13  -0.433   0.6663  
## work_yrs      338.47    1087.50   0.311   0.7563  
## satis       -1284.40    2065.97  -0.622   0.5357  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15410 on 93 degrees of freedom
## Multiple R-squared:  0.3218, Adjusted R-squared:  0.2562 
## F-statistic: 4.903 on 9 and 93 DF,  p-value: 2.219e-05
fit3<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+work_yrs,data=mbaproper)
summary(fit3)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + s_avg + f_avg + work_yrs, data = mbaproper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31270  -7556    665   5143  69407 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 43964.6689 47268.2859   0.930   0.3547  
## age          2363.9733   992.6810   2.381   0.0193 *
## gmat_tot       -0.7838   165.5112  -0.005   0.9962  
## gmat_qpc      868.1298   483.7421   1.795   0.0759 .
## gmat_tpc    -1467.2112   704.7030  -2.082   0.0401 *
## gmat_vpc      546.2923   483.7512   1.129   0.2617  
## s_avg        3895.8927  4857.2091   0.802   0.4245  
## f_avg       -1709.7057  3783.2060  -0.452   0.6524  
## work_yrs      372.0196  1082.6051   0.344   0.7319  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15360 on 94 degrees of freedom
## Multiple R-squared:  0.319,  Adjusted R-squared:  0.261 
## F-statistic: 5.503 on 8 and 94 DF,  p-value: 1.05e-05
fit4<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+work_yrs,data=mbaproper)
summary(fit4)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + work_yrs, data = mbaproper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29602  -7617    329   5510  66763 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 44161.95   46928.83   0.941   0.3490  
## age          2412.78     981.86   2.457   0.0158 *
## gmat_tot       12.71     157.04   0.081   0.9357  
## gmat_qpc      810.35     468.98   1.728   0.0872 .
## gmat_tpc    -1411.33     695.03  -2.031   0.0451 *
## gmat_vpc      501.51     470.60   1.066   0.2892  
## work_yrs      466.48    1067.93   0.437   0.6632  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15250 on 96 degrees of freedom
## Multiple R-squared:  0.3143, Adjusted R-squared:  0.2714 
## F-statistic: 7.333 on 6 and 96 DF,  p-value: 1.806e-06
fit5<-lm(formula=salary~age+gmat_tot+work_yrs,data=mbaproper)
summary(fit5)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + work_yrs, data = mbaproper)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32657  -8150  -2117   4705  78974 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 46876.35   29418.14   1.593   0.1142  
## age          2448.62    1002.87   2.442   0.0164 *
## gmat_tot      -17.19      30.92  -0.556   0.5795  
## work_yrs      319.93    1094.82   0.292   0.7707  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15680 on 99 degrees of freedom
## Multiple R-squared:  0.2529, Adjusted R-squared:  0.2303 
## F-statistic: 11.17 on 3 and 99 DF,  p-value: 2.228e-06

So we could see that the first model would be the best suitable among these values.

Comparison between placed and unplaced students.

mba <-mbasal[which(mbasal$salary!=998 & mbasal$salary !=999),]
mba$placed[mba$salary ==0] <- 0
mba$placed[mba$salary !=0] <- 1
model1<-xtabs(~sex+placed,data=mba)
model1
##    placed
## sex  0  1
##   1 67 72
##   2 23 31
chisq.test(model1)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  model1
## X-squared = 0.29208, df = 1, p-value = 0.5889

As p>0.05,we accept the null hypothesis and hence people placed and gender are independent.

model2<-xtabs(~age+placed,data=mba)
model2
##     placed
## age   0  1
##   22  1  1
##   23  3  5
##   24 13 16
##   25  9 23
##   26 10 14
##   27 14 14
##   28  6  8
##   29 11  6
##   30  2  6
##   31  2  4
##   32  5  1
##   33  0  1
##   34  3  1
##   35  3  0
##   36  2  0
##   37  1  0
##   39  1  1
##   40  0  2
##   42  1  0
##   43  2  0
##   48  1  0
chisq.test(model2)
## Warning in chisq.test(model2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  model2
## X-squared = 27.943, df = 20, p-value = 0.1108

As p>0.05,we accept the null hypothesis and hence people placed and age are independent.

model3<-xtabs(~s_avg+placed,data=mba)
model3
##       placed
## s_avg   0  1
##   2     1  0
##   2.1   2  0
##   2.2   1  1
##   2.3   2  1
##   2.4   2  4
##   2.5   0  3
##   2.6   1  6
##   2.7   8  4
##   2.8   9  7
##   2.82  1  0
##   2.9   9 13
##   2.91  0  1
##   3    10  6
##   3.08  1  0
##   3.09  2  2
##   3.1   6  7
##   3.17  1  0
##   3.2   4  9
##   3.25  1  0
##   3.27  2  1
##   3.3   9 11
##   3.38  1  0
##   3.4   7  5
##   3.45  1  1
##   3.5   2 10
##   3.6   4  6
##   3.64  1  0
##   3.7   0  2
##   3.8   1  2
##   3.9   1  0
##   4     0  1
chisq.test(model3)
## Warning in chisq.test(model3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  model3
## X-squared = 33.09, df = 30, p-value = 0.3187

As p>0.05,we accept the null hypothesis and hence people placed and spring average are independent.

model4<-xtabs(~f_avg+placed,data=mba)
model4
##       placed
## f_avg   0  1
##   0     1  1
##   2     3  2
##   2.25  2  1
##   2.5   8  5
##   2.67  1  1
##   2.75  9 15
##   2.83  0  1
##   3    24 25
##   3.17  1  0
##   3.2   1  0
##   3.25 18 25
##   3.33  1  1
##   3.4   1  0
##   3.5   7 17
##   3.6   1  2
##   3.67  1  2
##   3.75  6  3
##   3.83  1  0
##   4     4  2
chisq.test(model4)
## Warning in chisq.test(model4): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  model4
## X-squared = 14.576, df = 18, p-value = 0.6908

As p>0.05,we accept the null hypothesis and hence people placed and fall average are independent. Hence we could see that placement is independent of many factors.