mba<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
mbac<-mba[which(mba$salary!=998 & mba$salary!=999 & mba$salary !=0),]
View(mba)
summary(mbac)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :500 Min. :39.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580 1st Qu.:72.00
## Median :26.00 Median :1.000 Median :620 Median :82.00
## Mean :26.78 Mean :1.301 Mean :616 Mean :79.73
## 3rd Qu.:28.00 3rd Qu.:2.000 3rd Qu.:655 3rd Qu.:89.00
## Max. :40.00 Max. :2.000 Max. :720 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :30.00 Min. :51.00 Min. :2.200 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.00 1st Qu.:2.850 1st Qu.:2.915
## Median :81.00 Median :87.00 Median :3.100 Median :3.250
## Mean :78.56 Mean :84.52 Mean :3.092 Mean :3.091
## 3rd Qu.:92.00 3rd Qu.:93.50 3rd Qu.:3.400 3rd Qu.:3.415
## Max. :99.00 Max. :99.00 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.00 Min. :1.000 Min. : 64000
## 1st Qu.:1.000 1st Qu.: 2.00 1st Qu.:1.000 1st Qu.: 95000
## Median :2.000 Median : 3.00 Median :1.000 Median :100000
## Mean :2.262 Mean : 3.68 Mean :1.068 Mean :103031
## 3rd Qu.:3.000 3rd Qu.: 4.00 3rd Qu.:1.000 3rd Qu.:106000
## Max. :4.000 Max. :16.00 Max. :2.000 Max. :220000
## satis
## Min. :3.000
## 1st Qu.:5.000
## Median :6.000
## Mean :5.883
## 3rd Qu.:6.000
## Max. :7.000
age
boxplot(mbac$age,horizontal = TRUE,main="Age",xlab="years")
gmat total
boxplot(mbac$gmat_tot,horizontal = TRUE,main="Gmat total",xlab="total score")
gmat total percentile
boxplot(mbac$gmat_tpc,horizontal = TRUE,main="Gmat total percentile",xlab="Gmat total percentile")
spring MBA average
boxplot(mbac$s_avg,horizontal = TRUE,main="Spring MBA average",xlab="spring MBA average")
fall MBA average
boxplot(mbac$f_avg,horizontal = TRUE,main="Fall MBA average",xlab="fall MBA average")
library(car)
scatterplotMatrix(formula = ~age+gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc+s_avg+f_avg+work_yrs+salary ,data=mbac)
library(corrgram)
corrgram(mbac, order=FALSE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of variables in MBA starting salaries ")
cor(mbac)
## age sex gmat_tot gmat_qpc gmat_vpc
## age 1.00000000 -0.14352927 -0.07871678 -0.165039057 0.01799420
## sex -0.14352927 1.00000000 -0.01955548 -0.147099027 0.05341428
## gmat_tot -0.07871678 -0.01955548 1.00000000 0.666382266 0.78038546
## gmat_qpc -0.16503906 -0.14709903 0.66638227 1.000000000 0.09466541
## gmat_vpc 0.01799420 0.05341428 0.78038546 0.094665411 1.00000000
## gmat_tpc -0.09609156 -0.04686981 0.96680810 0.658650025 0.78443167
## s_avg 0.15654954 0.08079985 0.17198874 0.015471662 0.15865101
## f_avg -0.21699191 0.16572186 0.12246257 0.098418869 0.02290167
## quarter -0.12568145 -0.02139041 -0.10578964 0.012648346 -0.12862079
## work_yrs 0.88052470 -0.09233003 -0.12280018 -0.182701263 -0.02812182
## frstlang 0.35026743 0.07512009 -0.13164323 0.014198516 -0.21835333
## salary 0.49964284 -0.16628869 -0.09067141 0.014141299 -0.13743230
## satis 0.10832308 -0.09199534 0.06474206 -0.003984632 0.14863481
## gmat_tpc s_avg f_avg quarter work_yrs
## age -0.09609156 0.15654954 -0.21699191 -0.12568145 0.88052470
## sex -0.04686981 0.08079985 0.16572186 -0.02139041 -0.09233003
## gmat_tot 0.96680810 0.17198874 0.12246257 -0.10578964 -0.12280018
## gmat_qpc 0.65865003 0.01547166 0.09841887 0.01264835 -0.18270126
## gmat_vpc 0.78443167 0.15865101 0.02290167 -0.12862079 -0.02812182
## gmat_tpc 1.00000000 0.13938500 0.07051391 -0.09955033 -0.13246963
## s_avg 0.13938500 1.00000000 0.44590413 -0.84038355 0.16328236
## f_avg 0.07051391 0.44590413 1.00000000 -0.43144819 -0.21633018
## quarter -0.09955033 -0.84038355 -0.43144819 1.00000000 -0.12896722
## work_yrs -0.13246963 0.16328236 -0.21633018 -0.12896722 1.00000000
## frstlang -0.16437561 -0.13788905 -0.05061394 0.10955726 0.19627277
## salary -0.13201783 0.10173175 -0.10603897 -0.12848526 0.45466634
## satis 0.11630842 -0.14356557 -0.11773304 0.22511985 0.06299926
## frstlang salary satis
## age 0.35026743 0.49964284 0.108323083
## sex 0.07512009 -0.16628869 -0.091995338
## gmat_tot -0.13164323 -0.09067141 0.064742057
## gmat_qpc 0.01419852 0.01414130 -0.003984632
## gmat_vpc -0.21835333 -0.13743230 0.148634805
## gmat_tpc -0.16437561 -0.13201783 0.116308417
## s_avg -0.13788905 0.10173175 -0.143565573
## f_avg -0.05061394 -0.10603897 -0.117733043
## quarter 0.10955726 -0.12848526 0.225119851
## work_yrs 0.19627277 0.45466634 0.062999256
## frstlang 1.00000000 0.26701953 0.089834769
## salary 0.26701953 1.00000000 -0.040050600
## satis 0.08983477 -0.04005060 1.000000000
t.test(mbac$salary~mbac$sex)
##
## Welch Two Sample t-test
##
## data: mbac$salary by mbac$sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2
## 104970.97 98524.39
t.test(mbac$salary~mbac$frstlang)
##
## Welch Two Sample t-test
##
## data: mbac$salary by mbac$frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -59933.62 22202.25
## sample estimates:
## mean in group 1 mean in group 2
## 101748.6 120614.3
Null hypothesis accepted.
order1<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+quarter+work_yrs+satis,data=mbac)
summary(order1)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + s_avg + f_avg + quarter + work_yrs + satis, data = mbac)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26196 -8241 -324 5297 70000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69019.43 52376.30 1.318 0.1909
## age 2379.27 1004.19 2.369 0.0199 *
## gmat_tot 29.52 176.18 0.168 0.8673
## gmat_qpc 813.29 492.44 1.652 0.1020
## gmat_tpc -1479.96 713.20 -2.075 0.0408 *
## gmat_vpc 489.93 495.74 0.988 0.3256
## s_avg -3124.32 8046.45 -0.388 0.6987
## f_avg -2345.08 3855.93 -0.608 0.5446
## quarter -2787.20 2694.67 -1.034 0.3037
## work_yrs 360.74 1087.30 0.332 0.7408
## satis -719.58 2136.17 -0.337 0.7370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15410 on 92 degrees of freedom
## Multiple R-squared: 0.3296, Adjusted R-squared: 0.2567
## F-statistic: 4.523 on 10 and 92 DF, p-value: 3.341e-05
order2<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+work_yrs,data=mbac)
summary(order2)
##
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc +
## gmat_vpc + work_yrs, data = mbac)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29602 -7617 329 5510 66763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44161.95 46928.83 0.941 0.3490
## age 2412.78 981.86 2.457 0.0158 *
## gmat_tot 12.71 157.04 0.081 0.9357
## gmat_qpc 810.35 468.98 1.728 0.0872 .
## gmat_tpc -1411.33 695.03 -2.031 0.0451 *
## gmat_vpc 501.51 470.60 1.066 0.2892
## work_yrs 466.48 1067.93 0.437 0.6632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15250 on 96 degrees of freedom
## Multiple R-squared: 0.3143, Adjusted R-squared: 0.2714
## F-statistic: 7.333 on 6 and 96 DF, p-value: 1.806e-06