MBA Starting Salaries

library(readr)
MBA_Starting_Salaries_Data_ <- read_csv("MBA Starting Salaries Data .csv")

## Parsed with column specification:
## cols(
##   age = col_integer(),
##   sex = col_integer(),
##   gmat_tot = col_integer(),
##   gmat_qpc = col_integer(),
##   gmat_vpc = col_integer(),
##   gmat_tpc = col_integer(),
##   s_avg = col_double(),
##   f_avg = col_double(),
##   quarter = col_integer(),
##   work_yrs = col_integer(),
##   frstlang = col_integer(),
##   salary = col_integer(),
##   satis = col_integer()
## )

View(MBA_Starting_Salaries_Data_)

Summarising the dataset

library(psych)
describe(MBA_Starting_Salaries_Data_)

##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

plots

hist(MBA_Starting_Salaries_Data_$gmat_tot,breaks = 6,col="green",main="Gmat total of MBA Graduates")

Most of the MBA graduates has their gmat total between 550-650

hist(MBA_Starting_Salaries_Data_$sex,col=c("green","red"))

Most of the MBA graduates in the sample were male

hist(MBA_Starting_Salaries_Data_$frstlang,col=c("pink","blue"))

MOST of the MBA graduates has english as their first language

hist(MBA_Starting_Salaries_Data_$work_yrs,col="orange")

boxplot(MBA_Starting_Salaries_Data_$salary ,main="Salary ",
              col=c("purple"),horizontal=TRUE,
              xlab="Salary" )

mytable<-with(MBA_Starting_Salaries_Data_,table(satis))
mytable

## satis
##   1   2   3   4   5   6   7 998 
##   1   1   5  17  74  97  33  46

here 33 graduates 33 graduates shows very high satisfication of their mba program

boxplot(MBA_Starting_Salaries_Data_$gmat_qpc,MBA_Starting_Salaries_Data_$gmat_vpc,horizontal = TRUE,col = c("red","pink"))

boxplot(MBA_Starting_Salaries_Data_$s_avg,MBA_Starting_Salaries_Data_$f_avg,col=c("green","yellow"),horizontal=TRUE)

#SALARY VS SEX

 aggregate(salary~sex,data=MBA_Starting_Salaries_Data_,mean)

##   sex   salary
## 1   1 37013.62
## 2   2 45121.07

The mean salary of female is higher than male

boxplot(salary~sex,data=MBA_Starting_Salaries_Data_,horizontal=TRUE,main="salary vs sex",col=c("blue","orange"))

#salary vs age

library(car)

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

scatterplot(salary~age,data=MBA_Starting_Salaries_Data_,xlab="age",ylab="salary",main="salary vs age")

#SALARY VS GMAT SCORE

library(car)
scatterplot(salary~gmat_tot,data=MBA_Starting_Salaries_Data_,xlab = "gmat total",ylab="salary",main="salary vs gmat score")

salary vs first language

 aggregate(salary~frstlang,data=MBA_Starting_Salaries_Data_,mean)

##   frstlang   salary
## 1        1 40627.12
## 2        2 26914.84

The mean salary of graduates who has english as their first language is higher than the graduates who has other languages as first language

boxplot(salary~frstlang,data=MBA_Starting_Salaries_Data_,horizontal=TRUE,main="salary vs first language",col=c("blue","orange"))

#SALARY VS WORK EXPERIENCE

library(car)
scatterplot(salary~work_yrs,data=MBA_Starting_Salaries_Data_,xlab = "work experience",ylab="salary",main="salary vs work experience")

corrogram

library(corrgram)
corrgram(MBA_Starting_Salaries_Data_, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="corrogram")

newdata <- MBA_Starting_Salaries_Data_[ which(MBA_Starting_Salaries_Data_$salary>'0' ), ]
describe(newdata)

##          vars   n     mean       sd  median  trimmed      mad   min    max
## age         1 184    26.79     2.77    26.0    26.45     1.48  22.0     40
## sex         2 184     1.24     0.43     1.0     1.18     0.00   1.0      2
## gmat_tot    3 184   621.96    54.76   620.0   621.69    59.30 450.0    790
## gmat_qpc    4 184    81.49    13.67    83.0    82.99    14.83  39.0     99
## gmat_vpc    5 184    78.66    17.24    83.0    80.84    17.79  16.0     99
## gmat_tpc    6 184    85.13    12.94    87.5    86.86    11.12   0.0     99
## s_avg       7 184     3.02     0.38     3.0     3.02     0.44   2.2      4
## f_avg       8 184     3.06     0.51     3.0     3.08     0.37   0.0      4
## quarter     9 184     2.45     1.13     2.0     2.43     1.48   1.0      4
## work_yrs   10 184     3.52     2.49     3.0     3.11     1.48   0.0     16
## frstlang   11 184     1.13     0.34     1.0     1.04     0.00   1.0      2
## salary     12 184 58114.34 52511.21 88250.0 56402.47 41883.45 998.0 220000
## satis      13 184   253.65   430.93     6.0   193.55     1.48   1.0    998
##             range  skew kurtosis      se
## age          18.0  1.91     5.94    0.20
## sex           1.0  1.18    -0.61    0.03
## gmat_tot    340.0 -0.07     0.30    4.04
## gmat_qpc     60.0 -0.86     0.16    1.01
## gmat_vpc     83.0 -1.09     0.82    1.27
## gmat_tpc     99.0 -2.36    10.16    0.95
## s_avg         1.8  0.12    -0.67    0.03
## f_avg         4.0 -2.21    12.15    0.04
## quarter       3.0  0.04    -1.40    0.08
## work_yrs     16.0  2.69     9.70    0.18
## frstlang      1.0  2.18     2.75    0.02
## salary   219002.0  0.02    -1.41 3871.18
## satis       997.0  1.15    -0.69   31.77

who got paid more

mytable<-xtabs(~salary+sex,newdata)
   mytable

##         sex
## salary    1  2
##   998    37  9
##   999    30  5
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1

t.test(salary~sex,data=newdata)

## 
##  Welch Two Sample t-test
## 
## data:  salary by sex
## t = -1.5302, df = 78.552, p-value = 0.13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -30667.267   4010.795
## sample estimates:
## mean in group 1 mean in group 2 
##        54854.72        68182.96

chisq.test(mytable)

## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 64.319, df = 43, p-value = 0.0192

There is a significant difference between salary of the male and female graduate From mean score from test female gets higher salary

mytable1<-xtabs(~salary+frstlang,newdata)
   mytable1

##         frstlang
## salary    1  2
##   998    38  8
##   999    26  9
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1

t.test(salary~frstlang,data=newdata)

## 
##  Welch Two Sample t-test
## 
## data:  salary by frstlang
## t = 1.984, df = 28.139, p-value = 0.05708
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   -823.4068 51947.5402
## sample estimates:
## mean in group 1 mean in group 2 
##        61448.53        35886.46

chisq.test(mytable1)

## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  mytable1
## X-squared = 48.273, df = 43, p-value = 0.2682

we aceept null hypothesis there is no significant difference

mytable2<-xtabs(~salary+gmat_tot,newdata)
   mytable2

##         gmat_tot
## salary   450 460 500 520 530 540 550 560 570 580 590 600 610 620 630 640
##   998      1   1   0   0   0   0   0   3   0   2   2   5   4   0   7   3
##   999      0   0   1   0   0   0   1   2   4   1   2   3   0   4   2   2
##   64000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   78256    0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   90000    0   0   0   0   0   0   0   0   0   1   0   0   0   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   93000    0   0   0   0   0   1   0   0   0   0   0   0   1   1   0   0
##   95000    0   0   0   0   1   0   0   2   0   0   0   0   2   0   0   0
##   96000    0   0   0   0   0   0   0   1   0   0   1   1   0   0   0   0
##   96500    0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   0   0   1   0   0   0   1   0   0
##   98000    0   0   0   0   0   0   0   1   3   1   1   0   1   0   0   0
##   99000    0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   100000   0   0   0   0   0   0   0   2   0   1   0   1   1   0   1   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   101000   0   0   0   0   0   0   0   0   0   0   0   1   0   1   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   104000   0   0   0   0   1   0   0   1   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   0   0   2   0   2   3   0   1   0   1   0   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   107000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   108000   0   0   0   0   0   0   0   0   1   0   0   1   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   112000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   115000   0   0   0   0   0   1   0   0   1   0   0   0   0   1   1   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   120000   0   0   0   0   0   0   0   0   0   0   0   2   0   0   0   0
##   126710   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   650 660 670 680 690 700 710 720 730 740 790
##   998      2   3   5   2   2   0   1   1   0   2   0
##   999      2   3   1   1   2   0   1   0   1   1   1
##   64000    0   0   0   0   0   0   0   0   0   0   0
##   77000    0   1   0   0   0   0   0   0   0   0   0
##   78256    0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   1   0   0   0   0   0   0   0   0
##   85000    0   1   0   0   0   1   0   1   0   0   0
##   86000    0   0   0   1   0   0   0   0   0   0   0
##   88000    1   0   0   0   0   0   0   0   0   0   0
##   88500    0   0   0   0   0   0   0   0   0   0   0
##   90000    1   0   0   0   0   0   0   0   0   0   0
##   92000    0   1   0   0   0   0   1   0   0   0   0
##   93000    0   0   0   0   0   0   0   0   0   0   0
##   95000    0   0   2   0   0   0   0   0   0   0   0
##   96000    1   0   0   0   0   0   0   0   0   0   0
##   96500    0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   0   0   0   0
##   98000    0   0   1   1   0   0   1   0   0   0   0
##   99000    0   0   0   0   0   0   0   0   0   0   0
##   100000   2   0   0   0   0   0   1   0   0   0   0
##   100400   0   0   0   0   0   0   0   0   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   0   0
##   101100   0   1   0   0   0   0   0   0   0   0   0
##   101600   0   0   0   0   0   0   0   0   0   0   0
##   102500   0   0   1   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0
##   104000   0   0   0   0   0   0   0   0   0   0   0
##   105000   1   0   0   1   0   0   0   0   0   0   0
##   106000   0   0   0   2   0   0   0   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   0   0
##   107300   0   1   0   0   0   0   0   0   0   0   0
##   107500   0   0   0   0   0   0   0   0   0   0   0
##   108000   0   0   0   0   0   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0
##   112000   0   0   1   1   0   0   0   0   0   0   0
##   115000   0   0   0   0   0   0   1   0   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0
##   120000   0   0   1   0   0   1   0   0   0   0   0
##   126710   0   0   0   0   0   0   0   0   0   0   0
##   130000   1   0   0   0   0   0   0   0   0   0   0
##   145800   0   0   0   0   0   0   0   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0
##   162000   0   0   0   0   0   1   0   0   0   0   0
##   220000   0   0   0   0   0   0   0   0   0   0   0

chisq.test(mytable2)

## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  mytable2
## X-squared = 1121.5, df = 1118, p-value = 0.465

mytable3<-xtabs(~salary+work_yrs,newdata)
   mytable3

##         work_yrs
## salary    0  1  2  3  4  5  6  7  8  9 10 15 16
##   998     0  2 16 13 12  0  1  1  1  0  0  0  0
##   999     1  2  6  8 11  2  2  2  0  1  0  0  0
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0
##   77000   0  0  1  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  1  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  1  2  1  0  0  0  0  0  0  0  0  0
##   86000   0  0  1  1  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  1  0  0  0  0  0  0  0  0  0
##   90000   0  0  2  0  0  1  0  0  0  0  0  0  0
##   92000   0  0  3  0  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  0  1  1  0  0  1  0  0  0  0
##   95000   1  1  2  2  0  1  0  0  0  0  0  0  0
##   96000   0  1  2  0  1  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  1  1  0  0  0  0  0  0  0  0
##   98000   0  0  7  1  1  0  0  1  0  0  0  0  0
##   99000   0  0  0  0  0  1  0  0  0  0  0  0  0
##   100000  0  0  6  1  1  0  1  0  0  0  0  0  0
##   100400  0  0  0  1  0  0  0  0  0  0  0  0  0
##   101000  0  0  2  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  0  1  0  0  0  0
##   101600  0  0  0  1  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  1  0  0  0  0  0  0
##   103000  0  0  0  1  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  2  0  0  0  0  0  0  0  0
##   105000  0  0  4  4  0  1  1  0  0  0  0  0  1
##   106000  0  0  0  0  0  0  2  0  1  0  0  0  0
##   107000  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  1  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  1  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  1  0  0  0  0  0  1
##   115000  0  2  0  1  2  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  1  0  0
##   120000  0  0  0  1  0  2  0  0  1  0  0  0  0
##   126710  0  0  0  1  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  1  0
##   162000  0  1  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  1  0

chisq.test(mytable3)

## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  mytable3
## X-squared = 773.16, df = 516, p-value = 1.433e-12

we reject null hypothesis

regression

fit <- lm(salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + work_yrs  + satis, 
         data=MBA_Starting_Salaries_Data_)
summary(fit)

## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + work_yrs + satis, data = MBA_Starting_Salaries_Data_)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64763 -45163  -3221  43605 186789 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 214629.575  65128.160   3.295  0.00112 ** 
## age          -3948.105   1530.064  -2.580  0.01041 *  
## sex           3730.050   6834.469   0.546  0.58568    
## gmat_tot      -299.779    209.269  -1.433  0.15318    
## gmat_qpc       338.156    582.841   0.580  0.56228    
## gmat_vpc       475.523    527.377   0.902  0.36805    
## gmat_tpc       522.828    417.568   1.252  0.21164    
## work_yrs      3391.302   1754.338   1.933  0.05429 .  
## satis          -47.715      7.856  -6.073 4.33e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47600 on 265 degrees of freedom
## Multiple R-squared:  0.1528, Adjusted R-squared:  0.1272 
## F-statistic: 5.974 on 8 and 265 DF,  p-value: 4.728e-07

fit1 <- lm(salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc +s_avg+f_avg+ work_yrs  + satis+quarter+frstlang, 
         data=MBA_Starting_Salaries_Data_)
summary(fit1)

## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + s_avg + f_avg + work_yrs + satis + quarter + frstlang, 
##     data = MBA_Starting_Salaries_Data_)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -77353 -42055  -4193  43432 204537 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 215476.47   74731.74   2.883  0.00426 ** 
## age          -3841.82    1578.26  -2.434  0.01560 *  
## sex           1810.69    6853.24   0.264  0.79183    
## gmat_tot      -278.09     209.44  -1.328  0.18540    
## gmat_qpc       334.82     578.64   0.579  0.56333    
## gmat_vpc       294.13     550.38   0.534  0.59351    
## gmat_tpc       512.59     417.03   1.229  0.22012    
## s_avg        12836.84   12919.75   0.994  0.32135    
## f_avg        -6371.67    6636.87  -0.960  0.33792    
## work_yrs      2881.46    1784.27   1.615  0.10753    
## satis          -47.17       7.84  -6.016 6.01e-09 ***
## quarter      -5443.82    4050.86  -1.344  0.18016    
## frstlang     -3058.63   10365.65  -0.295  0.76817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47130 on 261 degrees of freedom
## Multiple R-squared:  0.1818, Adjusted R-squared:  0.1442 
## F-statistic: 4.834 on 12 and 261 DF,  p-value: 3.555e-07

fit2 <- lm(salary ~ age + sex + gmat_tot + work_yrs  + satis+frstlang, 
         data=MBA_Starting_Salaries_Data_)
summary(fit2)

## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + work_yrs + satis + 
##     frstlang, data = MBA_Starting_Salaries_Data_)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63826 -44997  -2531  43591 184589 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 163652.153  50074.980   3.268  0.00122 ** 
## age          -4039.139   1543.341  -2.617  0.00937 ** 
## sex           5030.505   6696.183   0.751  0.45316    
## gmat_tot       -29.586     51.836  -0.571  0.56865    
## work_yrs      3428.567   1777.157   1.929  0.05476 .  
## satis          -46.713      7.901  -5.912 1.03e-08 ***
## frstlang     -6545.915   9235.808  -0.709  0.47910    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47700 on 267 degrees of freedom
## Multiple R-squared:  0.1429, Adjusted R-squared:  0.1237 
## F-statistic: 7.421 on 6 and 267 DF,  p-value: 2.322e-07

fit3 <- lm(salary ~ age +  work_yrs  + satis, 
         data=MBA_Starting_Salaries_Data_)
summary(fit3)

## 
## Call:
## lm(formula = salary ~ age + work_yrs + satis, data = MBA_Starting_Salaries_Data_)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63123 -46260  -1564  45467 185324 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 149985.482  36096.495   4.155 4.37e-05 ***
## age          -4287.696   1514.706  -2.831  0.00499 ** 
## work_yrs      3765.669   1734.895   2.171  0.03084 *  
## satis          -47.858      7.808  -6.129 3.12e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47550 on 270 degrees of freedom
## Multiple R-squared:  0.1387, Adjusted R-squared:  0.1291 
## F-statistic: 14.49 on 3 and 270 DF,  p-value: 8.813e-09

The p value here is highly less than 0.05 which means the model is good fit

Mba salary

Subarna N

February 20, 2018