setwd("C:/Users/Kalyan/Downloads")
mbasal<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
View(mbasal)
summary(mbasal)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
str(mbasal)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
library(psych)
describe(mbasal)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
Now since there are some people who didn’t answer the survey or didn’t disclose their salary,we need to eliminate their responses.
mbaproper<-mbasal[which(mbasal$salary!=998 & mbasal$salary!=999 & mbasal$salary !=0),]
summary(mbaproper)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :500 Min. :39.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580 1st Qu.:72.00
## Median :26.00 Median :1.000 Median :620 Median :82.00
## Mean :26.78 Mean :1.301 Mean :616 Mean :79.73
## 3rd Qu.:28.00 3rd Qu.:2.000 3rd Qu.:655 3rd Qu.:89.00
## Max. :40.00 Max. :2.000 Max. :720 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :30.00 Min. :51.00 Min. :2.200 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.00 1st Qu.:2.850 1st Qu.:2.915
## Median :81.00 Median :87.00 Median :3.100 Median :3.250
## Mean :78.56 Mean :84.52 Mean :3.092 Mean :3.091
## 3rd Qu.:92.00 3rd Qu.:93.50 3rd Qu.:3.400 3rd Qu.:3.415
## Max. :99.00 Max. :99.00 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.00 Min. :1.000 Min. : 64000
## 1st Qu.:1.000 1st Qu.: 2.00 1st Qu.:1.000 1st Qu.: 95000
## Median :2.000 Median : 3.00 Median :1.000 Median :100000
## Mean :2.262 Mean : 3.68 Mean :1.068 Mean :103031
## 3rd Qu.:3.000 3rd Qu.: 4.00 3rd Qu.:1.000 3rd Qu.:106000
## Max. :4.000 Max. :16.00 Max. :2.000 Max. :220000
## satis
## Min. :3.000
## 1st Qu.:5.000
## Median :6.000
## Mean :5.883
## 3rd Qu.:6.000
## Max. :7.000
str(mbaproper)
## 'data.frame': 103 obs. of 13 variables:
## $ age : int 22 27 25 25 27 28 24 25 25 25 ...
## $ sex : int 2 2 2 2 1 2 1 2 2 1 ...
## $ gmat_tot: int 660 700 680 650 710 620 670 560 530 650 ...
## $ gmat_qpc: int 90 94 87 82 96 52 84 52 50 79 ...
## $ gmat_vpc: int 92 98 96 91 96 98 96 81 62 93 ...
## $ gmat_tpc: int 94 98 96 93 98 87 95 72 61 93 ...
## $ s_avg : num 3.5 3.3 3.5 3.4 3.3 3.4 3.3 3.3 3.6 3.3 ...
## $ f_avg : num 3.75 3.25 2.67 3.25 3.5 3.75 3.25 3.5 3.67 3.5 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 1 2 2 3 2 5 0 1 3 1 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 1 1 ...
## $ salary : int 85000 85000 86000 88000 92000 93000 95000 95000 95000 96000 ...
## $ satis : int 5 6 5 7 6 5 4 5 3 7 ...
describe(mbaproper)
## vars n mean sd median trimmed mad min
## age 1 103 26.78 3.27 2.60e+01 26.30 2.97 22.0
## sex 2 103 1.30 0.46 1.00e+00 1.25 0.00 1.0
## gmat_tot 3 103 616.02 50.69 6.20e+02 615.90 59.30 500.0
## gmat_qpc 4 103 79.73 13.39 8.20e+01 81.05 13.34 39.0
## gmat_vpc 5 103 78.56 16.14 8.10e+01 80.33 16.31 30.0
## gmat_tpc 6 103 84.52 11.01 8.70e+01 85.60 11.86 51.0
## s_avg 7 103 3.09 0.38 3.10e+00 3.10 0.44 2.2
## f_avg 8 103 3.09 0.49 3.25e+00 3.13 0.37 0.0
## quarter 9 103 2.26 1.12 2.00e+00 2.20 1.48 1.0
## work_yrs 10 103 3.68 3.01 3.00e+00 3.11 1.48 0.0
## frstlang 11 103 1.07 0.25 1.00e+00 1.00 0.00 1.0
## salary 12 103 103030.74 17868.80 1.00e+05 101065.06 7413.00 64000.0
## satis 13 103 5.88 0.78 6.00e+00 5.89 1.48 3.0
## max range skew kurtosis se
## age 40 18.0 1.92 4.90 0.32
## sex 2 1.0 0.86 -1.28 0.05
## gmat_tot 720 220.0 0.01 -0.69 4.99
## gmat_qpc 99 60.0 -0.81 0.17 1.32
## gmat_vpc 99 69.0 -0.87 0.21 1.59
## gmat_tpc 99 48.0 -0.84 0.19 1.08
## s_avg 4 1.8 -0.13 -0.61 0.04
## f_avg 4 4.0 -2.52 13.86 0.05
## quarter 4 3.0 0.27 -1.34 0.11
## work_yrs 16 16.0 2.48 6.83 0.30
## frstlang 2 1.0 3.38 9.54 0.02
## salary 220000 156000.0 3.18 17.16 1760.67
## satis 7 4.0 -0.40 0.44 0.08
For age
boxplot(mbaproper$age,horizontal = TRUE,main="Age",xlab="years",col="yellow")
hist(mbaproper$age,main="Age",xlab="years",col="yellow")
For gmat total
boxplot(mbaproper$gmat_tot,horizontal = TRUE,main="Gmat total",xlab="total score",col="yellow")
hist(mbaproper$gmat_tot,main="Gmat total score",xlab="Gmat total score",col="yellow")
For gmat quantitative percentile
boxplot(mbaproper$gmat_qpc,horizontal = TRUE,main="Gmat quantitative percentile",xlab="Gmat quantitative percentile",col="yellow")
hist(mbaproper$gmat_qpc,main="Gmat quantitative percentile",xlab="Gmat quantitative percentile",col="yellow")
For gmat verbal percentile
boxplot(mbaproper$gmat_vpc,horizontal = TRUE,main="Gmat verbal percentile",xlab="Gmat verbal percentile",col="yellow")
hist(mbaproper$gmat_vpc,main="Gmat verbal percentile",xlab="Gmat verbal percentile",col="yellow")
For gmat total percentile
boxplot(mbaproper$gmat_tpc,horizontal = TRUE,main="Gmat total percentile",xlab="Gmat total percentile",col="yellow")
hist(mbaproper$gmat_tpc,main="Gmat total percentile",xlab="Gmat total percentile",col="yellow")
For spring MBA average
boxplot(mbaproper$s_avg,horizontal = TRUE,main="Spring MBA average",xlab="spring MBA average",col="yellow")
hist(mbaproper$s_avg,main="Spring MBA average",xlab="spring MBA average",col="yellow")
For fall MBA average
boxplot(mbaproper$f_avg,horizontal = TRUE,main="Fall MBA average",xlab="fall MBA average",col="yellow")
hist(mbaproper$f_avg,main="Fall MBA average",xlab="fall MBA average",col="yellow")
Let us visualize the questions from the case study.
library(car)
scatterplotMatrix(formula = ~age+gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc+s_avg+f_avg+work_yrs+salary ,data=mbaproper)
library(corrgram)
corrgram(mbaproper, order=FALSE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of variables in MBA starting salaries ")
cor(mbaproper)
## age sex gmat_tot gmat_qpc gmat_vpc
## age 1.00000000 -0.14352927 -0.07871678 -0.165039057 0.01799420
## sex -0.14352927 1.00000000 -0.01955548 -0.147099027 0.05341428
## gmat_tot -0.07871678 -0.01955548 1.00000000 0.666382266 0.78038546
## gmat_qpc -0.16503906 -0.14709903 0.66638227 1.000000000 0.09466541
## gmat_vpc 0.01799420 0.05341428 0.78038546 0.094665411 1.00000000
## gmat_tpc -0.09609156 -0.04686981 0.96680810 0.658650025 0.78443167
## s_avg 0.15654954 0.08079985 0.17198874 0.015471662 0.15865101
## f_avg -0.21699191 0.16572186 0.12246257 0.098418869 0.02290167
## quarter -0.12568145 -0.02139041 -0.10578964 0.012648346 -0.12862079
## work_yrs 0.88052470 -0.09233003 -0.12280018 -0.182701263 -0.02812182
## frstlang 0.35026743 0.07512009 -0.13164323 0.014198516 -0.21835333
## salary 0.49964284 -0.16628869 -0.09067141 0.014141299 -0.13743230
## satis 0.10832308 -0.09199534 0.06474206 -0.003984632 0.14863481
## gmat_tpc s_avg f_avg quarter work_yrs
## age -0.09609156 0.15654954 -0.21699191 -0.12568145 0.88052470
## sex -0.04686981 0.08079985 0.16572186 -0.02139041 -0.09233003
## gmat_tot 0.96680810 0.17198874 0.12246257 -0.10578964 -0.12280018
## gmat_qpc 0.65865003 0.01547166 0.09841887 0.01264835 -0.18270126
## gmat_vpc 0.78443167 0.15865101 0.02290167 -0.12862079 -0.02812182
## gmat_tpc 1.00000000 0.13938500 0.07051391 -0.09955033 -0.13246963
## s_avg 0.13938500 1.00000000 0.44590413 -0.84038355 0.16328236
## f_avg 0.07051391 0.44590413 1.00000000 -0.43144819 -0.21633018
## quarter -0.09955033 -0.84038355 -0.43144819 1.00000000 -0.12896722
## work_yrs -0.13246963 0.16328236 -0.21633018 -0.12896722 1.00000000
## frstlang -0.16437561 -0.13788905 -0.05061394 0.10955726 0.19627277
## salary -0.13201783 0.10173175 -0.10603897 -0.12848526 0.45466634
## satis 0.11630842 -0.14356557 -0.11773304 0.22511985 0.06299926
## frstlang salary satis
## age 0.35026743 0.49964284 0.108323083
## sex 0.07512009 -0.16628869 -0.091995338
## gmat_tot -0.13164323 -0.09067141 0.064742057
## gmat_qpc 0.01419852 0.01414130 -0.003984632
## gmat_vpc -0.21835333 -0.13743230 0.148634805
## gmat_tpc -0.16437561 -0.13201783 0.116308417
## s_avg -0.13788905 0.10173175 -0.143565573
## f_avg -0.05061394 -0.10603897 -0.117733043
## quarter 0.10955726 -0.12848526 0.225119851
## work_yrs 0.19627277 0.45466634 0.062999256
## frstlang 1.00000000 0.26701953 0.089834769
## salary 0.26701953 1.00000000 -0.040050600
## satis 0.08983477 -0.04005060 1.000000000
Covariance matrix
cov(mbaproper)
## age sex gmat_tot gmat_qpc
## age 10.7045498 -2.164477e-01 -1.305445e+01 -7.22796497
## sex -0.2164477 2.124500e-01 -4.568818e-01 -0.90757662
## gmat_tot -13.0544451 -4.568818e-01 2.569294e+03 452.14258519
## gmat_qpc -7.2279650 -9.075766e-01 4.521426e+02 179.18027794
## gmat_vpc 0.9505045 3.974872e-01 6.386360e+02 20.45849990
## gmat_tpc -3.4602132 -2.377689e-01 5.393623e+02 97.03607462
## s_avg 0.1938587 1.409575e-02 3.299562e+00 0.07838473
## f_avg -0.3462517 3.725395e-02 3.027432e+00 0.64252142
## quarter -0.4604988 -1.104131e-02 -6.005140e+00 0.18960594
## work_yrs 8.6728536 -1.281173e-01 -1.873882e+01 -7.36245955
## frstlang 0.2898344 8.756901e-03 -1.687607e+00 0.04806777
## salary 29210.5193223 -1.369577e+03 -8.212449e+04 3382.43784504
## satis 0.2776509 -3.321911e-02 2.570912e+00 -0.04178565
## gmat_vpc gmat_tpc s_avg f_avg
## age 9.505045e-01 -3.460213e+00 0.19385875 -3.462517e-01
## sex 3.974872e-01 -2.377689e-01 0.01409575 3.725395e-02
## gmat_tot 6.386360e+02 5.393623e+02 3.29956215 3.027432e+00
## gmat_qpc 2.045850e+01 9.703607e+01 0.07838473 6.425214e-01
## gmat_vpc 2.606602e+02 1.393882e+02 0.96945936 1.803303e-01
## gmat_tpc 1.393882e+02 1.211342e+02 0.58062916 3.785056e-01
## s_avg 9.694594e-01 5.806292e-01 0.14325138 8.231046e-02
## f_avg 1.803303e-01 3.785056e-01 0.08231046 2.378638e-01
## quarter -2.325528e+00 -1.227013e+00 -0.35620503 -2.356492e-01
## work_yrs -1.366838e+00 -4.389206e+00 0.18604797 -3.176271e-01
## frstlang -8.915858e-01 -4.575481e-01 -0.01319912 -6.243099e-03
## salary -3.964803e+04 -2.596339e+04 688.02042071 -9.241129e+02
## satis 1.879973e+00 1.002856e+00 -0.04256901 -4.498382e-02
## quarter work_yrs frstlang salary
## age -4.604988e-01 8.6728536 2.898344e-01 2.921052e+04
## sex -1.104131e-02 -0.1281173 8.756901e-03 -1.369577e+03
## gmat_tot -6.005140e+00 -18.7388159 -1.687607e+00 -8.212449e+04
## gmat_qpc 1.896059e-01 -7.3624595 4.806777e-02 3.382438e+03
## gmat_vpc -2.325528e+00 -1.3668380 -8.915858e-01 -3.964803e+04
## gmat_tpc -1.227013e+00 -4.3892062 -4.575481e-01 -2.596339e+04
## s_avg -3.562050e-01 0.1860480 -1.319912e-02 6.880204e+02
## f_avg -2.356492e-01 -0.3176271 -6.243099e-03 -9.241129e+02
## quarter 1.254140e+00 -0.4347992 3.102989e-02 -2.571117e+03
## work_yrs -4.347992e-01 9.0630116 1.494384e-01 2.445820e+04
## frstlang 3.102989e-02 0.1494384 6.396345e-02 1.206714e+03
## salary -2.571117e+03 24458.1995050 1.206714e+03 3.192940e+08
## satis 1.975062e-01 0.1485818 1.779935e-02 -5.606583e+02
## satis
## age 0.27765087
## sex -0.03321911
## gmat_tot 2.57091186
## gmat_qpc -0.04178565
## gmat_vpc 1.87997335
## gmat_tpc 1.00285551
## s_avg -0.04256901
## f_avg -0.04498382
## quarter 0.19750619
## work_yrs 0.14858176
## frstlang 0.01779935
## salary -560.65829050
## satis 0.61374453
t.test(mbaproper$salary~mbaproper$sex)
##
## Welch Two Sample t-test
##
## data: mbaproper$salary by mbaproper$sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2
## 104970.97 98524.39
t.test(mbaproper$salary~mbaproper$frstlang)
##
## Welch Two Sample t-test
##
## data: mbaproper$salary by mbaproper$frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -59933.62 22202.25
## sample estimates:
## mean in group 1 mean in group 2
## 101748.6 120614.3
So since the p values are p>0.05 in both the cases,we accept the null hypothesis which says that salary is independent of gender and even first language.
fit1<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+quarter+work_yrs+satis,data=mbaproper)
summary(fit1)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + s_avg + f_avg + quarter + work_yrs + satis, data = mbaproper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26196 -8241 -324 5297 70000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69019.43 52376.30 1.318 0.1909
## age 2379.27 1004.19 2.369 0.0199 *
## gmat_tot 29.52 176.18 0.168 0.8673
## gmat_qpc 813.29 492.44 1.652 0.1020
## gmat_tpc -1479.96 713.20 -2.075 0.0408 *
## gmat_vpc 489.93 495.74 0.988 0.3256
## s_avg -3124.32 8046.45 -0.388 0.6987
## f_avg -2345.08 3855.93 -0.608 0.5446
## quarter -2787.20 2694.67 -1.034 0.3037
## work_yrs 360.74 1087.30 0.332 0.7408
## satis -719.58 2136.17 -0.337 0.7370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15410 on 92 degrees of freedom
## Multiple R-squared: 0.3296, Adjusted R-squared: 0.2567
## F-statistic: 4.523 on 10 and 92 DF, p-value: 3.341e-05
fit2<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+work_yrs+satis,data=mbaproper)
summary(fit2)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + s_avg + f_avg + work_yrs + satis, data = mbaproper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29429 -7405 358 5528 69521
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54916.31 50589.39 1.086 0.2805
## age 2437.73 1002.98 2.430 0.0170 *
## gmat_tot -21.21 169.27 -0.125 0.9005
## gmat_qpc 891.50 486.78 1.831 0.0702 .
## gmat_tpc -1419.88 711.10 -1.997 0.0488 *
## gmat_vpc 579.60 488.29 1.187 0.2382
## s_avg 3460.17 4923.28 0.703 0.4839
## f_avg -1642.93 3797.13 -0.433 0.6663
## work_yrs 338.47 1087.50 0.311 0.7563
## satis -1284.40 2065.97 -0.622 0.5357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15410 on 93 degrees of freedom
## Multiple R-squared: 0.3218, Adjusted R-squared: 0.2562
## F-statistic: 4.903 on 9 and 93 DF, p-value: 2.219e-05
fit3<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+work_yrs,data=mbaproper)
summary(fit3)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + s_avg + f_avg + work_yrs, data = mbaproper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31270 -7556 665 5143 69407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43964.6689 47268.2859 0.930 0.3547
## age 2363.9733 992.6810 2.381 0.0193 *
## gmat_tot -0.7838 165.5112 -0.005 0.9962
## gmat_qpc 868.1298 483.7421 1.795 0.0759 .
## gmat_tpc -1467.2112 704.7030 -2.082 0.0401 *
## gmat_vpc 546.2923 483.7512 1.129 0.2617
## s_avg 3895.8927 4857.2091 0.802 0.4245
## f_avg -1709.7057 3783.2060 -0.452 0.6524
## work_yrs 372.0196 1082.6051 0.344 0.7319
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15360 on 94 degrees of freedom
## Multiple R-squared: 0.319, Adjusted R-squared: 0.261
## F-statistic: 5.503 on 8 and 94 DF, p-value: 1.05e-05
fit4<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+work_yrs,data=mbaproper)
summary(fit4)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + work_yrs, data = mbaproper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29602 -7617 329 5510 66763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44161.95 46928.83 0.941 0.3490
## age 2412.78 981.86 2.457 0.0158 *
## gmat_tot 12.71 157.04 0.081 0.9357
## gmat_qpc 810.35 468.98 1.728 0.0872 .
## gmat_tpc -1411.33 695.03 -2.031 0.0451 *
## gmat_vpc 501.51 470.60 1.066 0.2892
## work_yrs 466.48 1067.93 0.437 0.6632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15250 on 96 degrees of freedom
## Multiple R-squared: 0.3143, Adjusted R-squared: 0.2714
## F-statistic: 7.333 on 6 and 96 DF, p-value: 1.806e-06
fit5<-lm(formula=salary~age+gmat_tot+work_yrs,data=mbaproper)
summary(fit5)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + work_yrs, data = mbaproper)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32657 -8150 -2117 4705 78974
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46876.35 29418.14 1.593 0.1142
## age 2448.62 1002.87 2.442 0.0164 *
## gmat_tot -17.19 30.92 -0.556 0.5795
## work_yrs 319.93 1094.82 0.292 0.7707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15680 on 99 degrees of freedom
## Multiple R-squared: 0.2529, Adjusted R-squared: 0.2303
## F-statistic: 11.17 on 3 and 99 DF, p-value: 2.228e-06
So we could see that the first model would be the best suitable among these values.
mba <-mbasal[which(mbasal$salary!=998 & mbasal$salary !=999),]
mba$placed[mba$salary ==0] <- 0
mba$placed[mba$salary !=0] <- 1
model1<-xtabs(~sex+placed,data=mba)
model1
## placed
## sex 0 1
## 1 67 72
## 2 23 31
chisq.test(model1)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: model1
## X-squared = 0.29208, df = 1, p-value = 0.5889
As p>0.05,we accept the null hypothesis and hence people placed and gender are independent.
model2<-xtabs(~age+placed,data=mba)
model2
## placed
## age 0 1
## 22 1 1
## 23 3 5
## 24 13 16
## 25 9 23
## 26 10 14
## 27 14 14
## 28 6 8
## 29 11 6
## 30 2 6
## 31 2 4
## 32 5 1
## 33 0 1
## 34 3 1
## 35 3 0
## 36 2 0
## 37 1 0
## 39 1 1
## 40 0 2
## 42 1 0
## 43 2 0
## 48 1 0
chisq.test(model2)
## Warning in chisq.test(model2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: model2
## X-squared = 27.943, df = 20, p-value = 0.1108
As p>0.05,we accept the null hypothesis and hence people placed and age are independent.
model3<-xtabs(~s_avg+placed,data=mba)
model3
## placed
## s_avg 0 1
## 2 1 0
## 2.1 2 0
## 2.2 1 1
## 2.3 2 1
## 2.4 2 4
## 2.5 0 3
## 2.6 1 6
## 2.7 8 4
## 2.8 9 7
## 2.82 1 0
## 2.9 9 13
## 2.91 0 1
## 3 10 6
## 3.08 1 0
## 3.09 2 2
## 3.1 6 7
## 3.17 1 0
## 3.2 4 9
## 3.25 1 0
## 3.27 2 1
## 3.3 9 11
## 3.38 1 0
## 3.4 7 5
## 3.45 1 1
## 3.5 2 10
## 3.6 4 6
## 3.64 1 0
## 3.7 0 2
## 3.8 1 2
## 3.9 1 0
## 4 0 1
chisq.test(model3)
## Warning in chisq.test(model3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: model3
## X-squared = 33.09, df = 30, p-value = 0.3187
As p>0.05,we accept the null hypothesis and hence people placed and spring average are independent.
model4<-xtabs(~f_avg+placed,data=mba)
model4
## placed
## f_avg 0 1
## 0 1 1
## 2 3 2
## 2.25 2 1
## 2.5 8 5
## 2.67 1 1
## 2.75 9 15
## 2.83 0 1
## 3 24 25
## 3.17 1 0
## 3.2 1 0
## 3.25 18 25
## 3.33 1 1
## 3.4 1 0
## 3.5 7 17
## 3.6 1 2
## 3.67 1 2
## 3.75 6 3
## 3.83 1 0
## 4 4 2
chisq.test(model4)
## Warning in chisq.test(model4): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: model4
## X-squared = 14.576, df = 18, p-value = 0.6908
As p>0.05,we accept the null hypothesis and hence people placed and fall average are independent. Hence we could see that placement is independent of many factors.