mba<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
mbac<-mba[which(mba$salary!=998 & mba$salary!=999 & mba$salary !=0),]
View(mba)
summary(mbac)
##       age             sex           gmat_tot      gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :500   Min.   :39.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580   1st Qu.:72.00  
##  Median :26.00   Median :1.000   Median :620   Median :82.00  
##  Mean   :26.78   Mean   :1.301   Mean   :616   Mean   :79.73  
##  3rd Qu.:28.00   3rd Qu.:2.000   3rd Qu.:655   3rd Qu.:89.00  
##  Max.   :40.00   Max.   :2.000   Max.   :720   Max.   :99.00  
##     gmat_vpc        gmat_tpc         s_avg           f_avg      
##  Min.   :30.00   Min.   :51.00   Min.   :2.200   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.00   1st Qu.:2.850   1st Qu.:2.915  
##  Median :81.00   Median :87.00   Median :3.100   Median :3.250  
##  Mean   :78.56   Mean   :84.52   Mean   :3.092   Mean   :3.091  
##  3rd Qu.:92.00   3rd Qu.:93.50   3rd Qu.:3.400   3rd Qu.:3.415  
##  Max.   :99.00   Max.   :99.00   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs        frstlang         salary      
##  Min.   :1.000   Min.   : 0.00   Min.   :1.000   Min.   : 64000  
##  1st Qu.:1.000   1st Qu.: 2.00   1st Qu.:1.000   1st Qu.: 95000  
##  Median :2.000   Median : 3.00   Median :1.000   Median :100000  
##  Mean   :2.262   Mean   : 3.68   Mean   :1.068   Mean   :103031  
##  3rd Qu.:3.000   3rd Qu.: 4.00   3rd Qu.:1.000   3rd Qu.:106000  
##  Max.   :4.000   Max.   :16.00   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :3.000  
##  1st Qu.:5.000  
##  Median :6.000  
##  Mean   :5.883  
##  3rd Qu.:6.000  
##  Max.   :7.000

Visualizing variables independently

age

boxplot(mbac$age,horizontal = TRUE,main="Age",xlab="years")

gmat total

boxplot(mbac$gmat_tot,horizontal = TRUE,main="Gmat total",xlab="total score")

gmat total percentile

boxplot(mbac$gmat_tpc,horizontal = TRUE,main="Gmat total percentile",xlab="Gmat total percentile")

spring MBA average

boxplot(mbac$s_avg,horizontal = TRUE,main="Spring MBA average",xlab="spring MBA average")

fall MBA average

boxplot(mbac$f_avg,horizontal = TRUE,main="Fall MBA average",xlab="fall MBA average")

Scatterplotmatrix

library(car)
scatterplotMatrix(formula = ~age+gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc+s_avg+f_avg+work_yrs+salary ,data=mbac)

Corrgram plot

library(corrgram)
corrgram(mbac, order=FALSE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of variables in MBA starting salaries ")

Correlations

cor(mbac)
##                  age         sex    gmat_tot     gmat_qpc    gmat_vpc
## age       1.00000000 -0.14352927 -0.07871678 -0.165039057  0.01799420
## sex      -0.14352927  1.00000000 -0.01955548 -0.147099027  0.05341428
## gmat_tot -0.07871678 -0.01955548  1.00000000  0.666382266  0.78038546
## gmat_qpc -0.16503906 -0.14709903  0.66638227  1.000000000  0.09466541
## gmat_vpc  0.01799420  0.05341428  0.78038546  0.094665411  1.00000000
## gmat_tpc -0.09609156 -0.04686981  0.96680810  0.658650025  0.78443167
## s_avg     0.15654954  0.08079985  0.17198874  0.015471662  0.15865101
## f_avg    -0.21699191  0.16572186  0.12246257  0.098418869  0.02290167
## quarter  -0.12568145 -0.02139041 -0.10578964  0.012648346 -0.12862079
## work_yrs  0.88052470 -0.09233003 -0.12280018 -0.182701263 -0.02812182
## frstlang  0.35026743  0.07512009 -0.13164323  0.014198516 -0.21835333
## salary    0.49964284 -0.16628869 -0.09067141  0.014141299 -0.13743230
## satis     0.10832308 -0.09199534  0.06474206 -0.003984632  0.14863481
##             gmat_tpc       s_avg       f_avg     quarter    work_yrs
## age      -0.09609156  0.15654954 -0.21699191 -0.12568145  0.88052470
## sex      -0.04686981  0.08079985  0.16572186 -0.02139041 -0.09233003
## gmat_tot  0.96680810  0.17198874  0.12246257 -0.10578964 -0.12280018
## gmat_qpc  0.65865003  0.01547166  0.09841887  0.01264835 -0.18270126
## gmat_vpc  0.78443167  0.15865101  0.02290167 -0.12862079 -0.02812182
## gmat_tpc  1.00000000  0.13938500  0.07051391 -0.09955033 -0.13246963
## s_avg     0.13938500  1.00000000  0.44590413 -0.84038355  0.16328236
## f_avg     0.07051391  0.44590413  1.00000000 -0.43144819 -0.21633018
## quarter  -0.09955033 -0.84038355 -0.43144819  1.00000000 -0.12896722
## work_yrs -0.13246963  0.16328236 -0.21633018 -0.12896722  1.00000000
## frstlang -0.16437561 -0.13788905 -0.05061394  0.10955726  0.19627277
## salary   -0.13201783  0.10173175 -0.10603897 -0.12848526  0.45466634
## satis     0.11630842 -0.14356557 -0.11773304  0.22511985  0.06299926
##             frstlang      salary        satis
## age       0.35026743  0.49964284  0.108323083
## sex       0.07512009 -0.16628869 -0.091995338
## gmat_tot -0.13164323 -0.09067141  0.064742057
## gmat_qpc  0.01419852  0.01414130 -0.003984632
## gmat_vpc -0.21835333 -0.13743230  0.148634805
## gmat_tpc -0.16437561 -0.13201783  0.116308417
## s_avg    -0.13788905  0.10173175 -0.143565573
## f_avg    -0.05061394 -0.10603897 -0.117733043
## quarter   0.10955726 -0.12848526  0.225119851
## work_yrs  0.19627277  0.45466634  0.062999256
## frstlang  1.00000000  0.26701953  0.089834769
## salary    0.26701953  1.00000000 -0.040050600
## satis     0.08983477 -0.04005060  1.000000000

t tests to check salary dependence on factors

t.test(mbac$salary~mbac$sex)
## 
##  Welch Two Sample t-test
## 
## data:  mbac$salary by mbac$sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2 
##       104970.97        98524.39
t.test(mbac$salary~mbac$frstlang)
## 
##  Welch Two Sample t-test
## 
## data:  mbac$salary by mbac$frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -59933.62  22202.25
## sample estimates:
## mean in group 1 mean in group 2 
##        101748.6        120614.3

Null hypothesis accepted.

order1<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+s_avg+f_avg+quarter+work_yrs+satis,data=mbac)
summary(order1)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + s_avg + f_avg + quarter + work_yrs + satis, data = mbac)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26196  -8241   -324   5297  70000 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 69019.43   52376.30   1.318   0.1909  
## age          2379.27    1004.19   2.369   0.0199 *
## gmat_tot       29.52     176.18   0.168   0.8673  
## gmat_qpc      813.29     492.44   1.652   0.1020  
## gmat_tpc    -1479.96     713.20  -2.075   0.0408 *
## gmat_vpc      489.93     495.74   0.988   0.3256  
## s_avg       -3124.32    8046.45  -0.388   0.6987  
## f_avg       -2345.08    3855.93  -0.608   0.5446  
## quarter     -2787.20    2694.67  -1.034   0.3037  
## work_yrs      360.74    1087.30   0.332   0.7408  
## satis        -719.58    2136.17  -0.337   0.7370  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15410 on 92 degrees of freedom
## Multiple R-squared:  0.3296, Adjusted R-squared:  0.2567 
## F-statistic: 4.523 on 10 and 92 DF,  p-value: 3.341e-05
order2<-lm(formula=salary~age+gmat_tot+gmat_qpc+gmat_tpc+gmat_vpc+work_yrs,data=mbac)
summary(order2)
## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_tpc + 
##     gmat_vpc + work_yrs, data = mbac)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29602  -7617    329   5510  66763 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 44161.95   46928.83   0.941   0.3490  
## age          2412.78     981.86   2.457   0.0158 *
## gmat_tot       12.71     157.04   0.081   0.9357  
## gmat_qpc      810.35     468.98   1.728   0.0872 .
## gmat_tpc    -1411.33     695.03  -2.031   0.0451 *
## gmat_vpc      501.51     470.60   1.066   0.2892  
## work_yrs      466.48    1067.93   0.437   0.6632  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15250 on 96 degrees of freedom
## Multiple R-squared:  0.3143, Adjusted R-squared:  0.2714 
## F-statistic: 7.333 on 6 and 96 DF,  p-value: 1.806e-06