Loading and summary

mba.df <- read.csv(paste("MBA Starting Salaries Data.csv", sep=""))
summary(mba.df)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

Correlation

cor(mba.df, use="complete.obs", method="kendall") 
##                   age          sex     gmat_tot    gmat_qpc     gmat_vpc
## age       1.000000000 -0.064918735 -0.089348575 -0.14294895  0.004312123
## sex      -0.064918735  1.000000000 -0.031512872 -0.11604667  0.063244361
## gmat_tot -0.089348575 -0.031512872  1.000000000  0.57049831  0.602830883
## gmat_qpc -0.142948954 -0.116046670  0.570498312  1.00000000  0.135761590
## gmat_vpc  0.004312123  0.063244361  0.602830883  0.13576159  1.000000000
## gmat_tpc -0.092059593 -0.029202995  0.976022086  0.56617453  0.594830301
## s_avg     0.074567920  0.104994627  0.075723411 -0.03360934  0.146074991
## f_avg     0.023463495  0.106433803  0.097979623  0.01921321  0.118891700
## quarter   0.004556967 -0.122029469 -0.065620299  0.04084046 -0.136979833
## work_yrs  0.655013245  0.003886011 -0.146648764 -0.19009952 -0.047736629
## frstlang  0.082749814  0.001536205 -0.104902855  0.12551916 -0.253495516
## salary   -0.066964226 -0.006991235  0.001523055 -0.01833562  0.006500531
## satis    -0.121775381 -0.030718623  0.079740627 -0.02494718  0.108479428
##              gmat_tpc       s_avg         f_avg      quarter     work_yrs
## age      -0.092059593  0.07456792  0.0234634952  0.004556967  0.655013245
## sex      -0.029202995  0.10499463  0.1064338033 -0.122029469  0.003886011
## gmat_tot  0.976022086  0.07572341  0.0979796225 -0.065620299 -0.146648764
## gmat_qpc  0.566174526 -0.03360934  0.0192132078  0.040840458 -0.190099517
## gmat_vpc  0.594830301  0.14607499  0.1188916999 -0.136979833 -0.047736629
## gmat_tpc  1.000000000  0.06976878  0.0920819234 -0.057151609 -0.144030804
## s_avg     0.069768777  1.00000000  0.5906570112 -0.719221088  0.062138164
## f_avg     0.092081923  0.59065701  1.0000000000 -0.479986073 -0.003777988
## quarter  -0.057151609 -0.71922109 -0.4799860731  1.000000000 -0.022169086
## work_yrs -0.144030804  0.06213816 -0.0037779876 -0.022169086  1.000000000
## frstlang -0.100679762 -0.12138557 -0.0700173031  0.091239794 -0.001161440
## salary    0.001900985  0.05469155  0.0236987836 -0.102670440 -0.009141085
## satis     0.080815648 -0.00332823 -0.0009964743 -0.006940549 -0.050227342
##              frstlang       salary         satis
## age       0.082749814 -0.066964226 -0.1217753809
## sex       0.001536205 -0.006991235 -0.0307186232
## gmat_tot -0.104902855  0.001523055  0.0797406267
## gmat_qpc  0.125519156 -0.018335616 -0.0249471837
## gmat_vpc -0.253495516  0.006500531  0.1084794281
## gmat_tpc -0.100679762  0.001900985  0.0808156484
## s_avg    -0.121385567  0.054691548 -0.0033282301
## f_avg    -0.070017303  0.023698784 -0.0009964743
## quarter   0.091239794 -0.102670440 -0.0069405492
## work_yrs -0.001161440 -0.009141085 -0.0502273419
## frstlang  1.000000000 -0.010042495 -0.0910683922
## salary   -0.010042495  1.000000000 -0.0020398247
## satis    -0.091068392 -0.002039825  1.0000000000
library(corrgram)
corrgram(mba.df, order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram of store variables")

Scatter Plot

library(car)
scatterplotMatrix(~age+sex+gmat_tot+s_avg+f_avg+work_yrs+salary+satis, data=mba.df)

placed.df <- mba.df[which (mba.df$salary > 1000) , ]
aggregate(cbind(salary, work_yrs, age) ~ sex, data = mba.df, mean)
##   sex   salary work_yrs      age
## 1   1 37013.62 3.893204 27.41748
## 2   2 45121.07 3.808824 27.17647
boxplot(salary ~ sex ,data=mba.df, main="Effect of Gender on Salary", ylab="Gender", xlab="Starting Salary")

aggregate(cbind(salary, work_yrs) ~ age, data = mba.df, mean)
##    age    salary  work_yrs
## 1   22  42500.00  1.000000
## 2   23  57282.00  1.750000
## 3   24  49342.24  1.727273
## 4   25  43395.55  2.264151
## 5   26  35982.07  2.875000
## 6   27  31499.37  3.130435
## 7   28  39809.00  4.666667
## 8   29  28067.95  4.500000
## 9   30  55291.25  5.583333
## 10  31  40599.40  5.800000
## 11  32  13662.25  5.625000
## 12  33 118000.00 10.000000
## 13  34  26250.00 11.500000
## 14  35      0.00  9.333333
## 15  36      0.00 12.500000
## 16  37      0.00  9.000000
## 17  39  56000.00 10.500000
## 18  40 183000.00 15.000000
## 19  42      0.00 13.000000
## 20  43      0.00 19.000000
## 21  48      0.00 22.000000
aggregate(cbind(salary, work_yrs) ~ satis , data = mba.df, mean)
##   satis    salary work_yrs
## 1     1   999.000 3.000000
## 2     2   999.000 2.000000
## 3     3 19799.200 4.200000
## 4     4  6293.412 2.941176
## 5     5 40476.311 4.243243
## 6     6 54383.536 4.185567
## 7     7 65718.152 3.727273
## 8   998   998.000 3.086957
boxplot(salary ~ work_yrs ,data=mba.df, main="Effect of Work Experience on Salary", xlab="Work Experience", ylab="Starting Salary")

library(lattice)
histogram(~salary, data = mba.df, main = "Distribution of MBA's Starting Salary", xlab="Starting Salary") 

placed.df <- mba.df[which (mba.df$salary > 1000) , ]
notPlaced.df <- mba.df[which(mba.df$salary==0), ]
notDisclosedSalary.df <- mba.df[which (mba.df$salary == 999) , ]

clean.df <- rbind(placed.df, notDisclosedSalary.df, notPlaced.df)
clean.df$GotPlaced = (clean.df$salary >1000)
clean.df$GotPlaced <- factor(clean.df$GotPlaced)

ChiSqare Test

t1 <- table(clean.df$GotPlaced == 'TRUE') 
t1
## 
## FALSE  TRUE 
##   125   103
t2 <- xtabs(~ GotPlaced + sex , data= clean.df) 
t2
##          sex
## GotPlaced  1  2
##     FALSE 97 28
##     TRUE  72 31
prop.table(t2, 2)
##          sex
## GotPlaced         1         2
##     FALSE 0.5739645 0.4745763
##     TRUE  0.4260355 0.5254237
t3 <- xtabs(~ GotPlaced + frstlang, data=clean.df )
t3
##          frstlang
## GotPlaced   1   2
##     FALSE 108  17
##     TRUE   96   7
prop.table(t3, 2)
##          frstlang
## GotPlaced         1         2
##     FALSE 0.5294118 0.7083333
##     TRUE  0.4705882 0.2916667
chisq.test(t2)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  t2
## X-squared = 1.366, df = 1, p-value = 0.2425
chisq.test(t3)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  t3
## X-squared = 2.1002, df = 1, p-value = 0.1473
mba.df$sex[mba.df$sex == 1] <- 'Male' 
mba.df$sex[mba.df$sex == 2] <- 'Female'
mba.df$sex <- factor(mba.df$sex) 
mba.df$frstlang[mba.df$frstlang == 1] <- 'English' 
mba.df$frstlang[mba.df$frstlang == 2] <- 'Other' 
mba.df$frstlang <- factor(mba.df$frstlang)
m <- salary ~ work_yrs + s_avg + f_avg + gmat_qpc + gmat_vpc + sex + frstlang + satis
fit <- lm(m, data = placed.df) 
summary(fit)
## 
## Call:
## lm(formula = m, data = placed.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29800  -7822  -1742   4869  82341 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 86719.94   23350.43   3.714 0.000346 ***
## work_yrs     2331.12     585.99   3.978 0.000137 ***
## s_avg        4659.05    5015.66   0.929 0.355320    
## f_avg       -1698.83    3834.70  -0.443 0.658773    
## gmat_qpc       98.72     121.85   0.810 0.419884    
## gmat_vpc      -95.80     102.99  -0.930 0.354699    
## sex         -5289.24    3545.91  -1.492 0.139140    
## frstlang    13994.76    6641.66   2.107 0.037770 *  
## satis       -1671.20    2070.62  -0.807 0.421643    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15740 on 94 degrees of freedom
## Multiple R-squared:  0.285,  Adjusted R-squared:  0.2241 
## F-statistic: 4.683 on 8 and 94 DF,  p-value: 7.574e-05
     nojob.df<- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary==0), ]
    head(nojob.df)
##   age    sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 1  23 Female      620       77       87       87   3.4  3.00       1
## 2  24   Male      610       90       71       87   3.5  4.00       1
## 3  24   Male      670       99       78       95   3.3  3.25       1
## 4  24   Male      570       56       81       75   3.3  2.67       1
## 6  24   Male      640       82       89       91   3.9  3.75       1
## 7  25   Male      610       89       74       87   3.4  3.50       1
##   work_yrs frstlang salary satis
## 1        2  English      0     7
## 2        2  English      0     6
## 3        2  English      0     6
## 4        1  English      0     7
## 6        2  English      0     6
## 7        2  English      0     5
   hist(nojob.df$gmat_tot,
     main = "GMAT performance of students with no job",
     xlab="GMAT score",
     breaks=10,
     col = "blue")

   chisq.test(nojob.df$work_yrs,nojob.df$satis)
## Warning in chisq.test(nojob.df$work_yrs, nojob.df$satis): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob.df$work_yrs and nojob.df$satis
## X-squared = 44.974, df = 48, p-value = 0.5976

It shows that the null hypothesis is true and that unplaced students with work experience are satisfied with the MBA program.