Salary.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
library(psych)
View(Salary.df)
describe(Salary.df)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
str(Salary.df)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
summary(Salary.df)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
library(lattice)
bwplot(Salary.df$age)
bwplot(Salary.df$s_avg)
bwplot(Salary.df$f_avg)
bwplot(Salary.df$work_yrs)
bwplot(Salary.df$gmat_tot)
bwplot(Salary.df$gmat_qpc)
bwplot(Salary.df$gmat_vpc)
bwplot(Salary.df$gmat_tpc)
bwplot(Salary.df$frstlang)
bwplot(Salary.df$salary)
bwplot(Salary.df$satis)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
par(mfrow=c(2,2))
scatterplot(gmat_tpc~gmat_qpc,gmat_vpc, data= Salary.df,
spread=TRUE,
main="GMAT total percentile vs GMAT Subject",
xlab="GMAt percentile",
ylab="Subject percentile")
scatterplot(quarter~salary, data= Salary.df,
spread=TRUE,
main="Starting salaries vs Quartile ranking",
xlab="Starting salaries", ylab="Quartile ranking")
scatterplot(gmat_tpc~salary, data=Salary.df,
spread=TRUE,
main="GMAT percentile vs starting salaries",
xlab="Starting salaries", ylab="GMAT percentile")
scatterplotMatrix(Salary.df[,c("gmat_tot","s_avg","f_avg","salary")],
spread = FALSE,smoother.args = list(lty=2),
main="MBA Starting Salary")
library(corrgram)
corrgram(Salary.df, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA starting salaries analysis")
cov(Salary.df)
## age sex gmat_tot gmat_qpc
## age 1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex -4.513248e-02 1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00 3.310688e+03 6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00 6.200233e+02 2.210731e+02
## gmat_vpc -2.763643e+00 5.463758e-01 7.260006e+02 3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02 6.839911e+02 1.357997e+02
## s_avg 2.116874e-01 2.096227e-02 2.480257e+00 -1.691233e-01
## f_avg -3.399348e-02 2.082698e-02 3.154688e+00 5.753854e-01
## quarter -2.045935e-01 -6.414267e-02 -5.891153e+00 6.001979e-01
## work_yrs 1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang 6.796610e-02 2.138980e-04 -2.499933e+00 6.646346e-01
## salary -1.183042e+04 1.518264e+03 -1.611600e+05 -3.335823e+04
## satis -1.763499e+02 -8.780808e+00 1.765263e+03 3.348371e+02
## gmat_vpc gmat_tpc s_avg f_avg
## age -2.7636427 -8.8399775 0.21168739 -0.03399348
## sex 0.5463758 -0.0490896 0.02096227 0.02082698
## gmat_tot 726.0006417 683.9910698 2.48025721 3.15468838
## gmat_qpc 38.1482581 135.7996845 -0.16912329 0.57538542
## gmat_vpc 284.2481217 157.4932488 1.31357023 0.67207000
## gmat_tpc 157.4932488 196.6057057 0.62710008 0.58698618
## s_avg 1.3135702 0.6271001 0.14521760 0.11016898
## f_avg 0.6720700 0.5869862 0.11016898 0.27567237
## quarter -3.2676666 -1.2923719 -0.32237213 -0.26080880
## work_yrs -3.6181653 -7.8575172 0.15926392 -0.06628700
## frstlang -2.1145691 -0.4663244 -0.01671372 -0.00626026
## salary -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis 392.3562739 484.2466779 -4.62884495 2.12532927
## quarter work_yrs frstlang salary
## age -2.045935e-01 10.29493864 6.796610e-02 -1.183042e+04
## sex -6.414267e-02 -0.01580172 2.138980e-04 1.518264e+03
## gmat_tot -5.891153e+00 -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc 6.001979e-01 -11.37186171 6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00 -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00 -7.85751718 -4.663244e-01 3.522750e+03
## s_avg -3.223721e-01 0.15926392 -1.671372e-02 2.831601e+03
## f_avg -2.608088e-01 -0.06628700 -6.260260e-03 7.876560e+02
## quarter 1.232119e+00 -0.30866822 3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01 10.44882490 -2.898318e-02 1.486147e+03
## frstlang 3.553381e-02 -0.02898318 1.035266e-01 -1.419586e+03
## salary -9.296214e+03 1486.14704152 -1.419586e+03 2.596062e+09
## satis -5.227133e-03 -131.24080907 9.484532e+00 -6.347115e+06
## satis
## age -1.763499e+02
## sex -8.780808e+00
## gmat_tot 1.765263e+03
## gmat_qpc 3.348371e+02
## gmat_vpc 3.923563e+02
## gmat_tpc 4.842467e+02
## s_avg -4.628845e+00
## f_avg 2.125329e+00
## quarter -5.227133e-03
## work_yrs -1.312408e+02
## frstlang 9.484532e+00
## salary -6.347115e+06
## satis 1.380974e+05
Job.df <- Salary.df[ which(Salary.df$salary !="998" & Salary.df$salary !="999" & Salary.df$salary!="0"), ]
head(Job.df)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35 22 2 660 90 92 94 3.5 3.75 1
## 36 27 2 700 94 98 98 3.3 3.25 1
## 37 25 2 680 87 96 96 3.5 2.67 1
## 38 25 2 650 82 91 93 3.4 3.25 1
## 39 27 1 710 96 96 98 3.3 3.50 1
## 40 28 2 620 52 98 87 3.4 3.75 1
## work_yrs frstlang salary satis
## 35 1 1 85000 5
## 36 2 1 85000 6
## 37 2 1 86000 5
## 38 3 1 88000 7
## 39 2 1 92000 6
## 40 5 1 93000 5
xtabs(~salary+age,data=Job.df)
## age
## salary 22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
## 64000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0
## 86000 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 90000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## 92000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0
## 95000 0 0 1 5 0 0 0 1 0 0 0 0 0 0 0
## 96000 0 0 1 1 2 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
## 98000 0 1 3 2 1 1 1 1 0 0 0 0 0 0 0
## 99000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 1 4 1 1 1 0 0 0 1 0 0 0 0 0
## 100400 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 101000 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 101600 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 103000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0
## 105000 0 1 1 2 3 1 0 0 1 1 0 0 1 0 0
## 106000 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0
## 107000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 112000 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0
## 115000 0 0 1 1 0 3 0 0 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 120000 0 0 0 0 0 1 1 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 162000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
#"age group of 24-30 were more placed than the others"
xtabs(~salary+sex,data=Job.df)
## sex
## salary 1 2
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
#"From this table we see that most higher starting salaries have been awarded to men."
xtabs(~salary+gmat_tot,data=Job.df)
## gmat_tot
## salary 500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
## 64000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 90000 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 93000 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
## 95000 0 0 1 0 0 2 0 0 0 0 2 0 0 0 0 0
## 96000 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## 98000 0 0 0 0 0 1 3 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 2 0 1 0 1 1 0 1 0 2 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104000 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 2 0 2 3 0 1 0 1 0 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 108000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 112000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 670 680 700 710 720
## 64000 0 0 0 0 0
## 77000 0 0 0 0 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 1 0 1
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 0 0 1 0
## 93000 0 0 0 0 0
## 95000 2 0 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 1 1 0 1 0
## 99000 0 0 0 0 0
## 100000 0 0 0 1 0
## 100400 0 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 1 0 0 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 2 0 0 0
## 107000 0 0 0 0 0
## 107300 0 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 1 1 0 0 0
## 115000 0 0 0 1 0
## 118000 0 0 0 0 0
## 120000 1 0 1 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 0 0 0 0 0
## 162000 0 0 1 0 0
## 220000 0 0 0 0 0
#"Students with gmat_tot>=560 are "
xtabs(~salary+s_avg,data=Job.df)
## s_avg
## salary 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 2.91 3 3.09 3.1 3.2 3.27 3.3 3.4
## 64000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 85000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
## 86000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 88500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1
## 93000 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1
## 95000 0 0 1 0 0 0 0 1 0 0 0 0 1 1 2 0
## 96000 0 0 0 1 0 0 0 0 0 0 0 0 1 0 2 0
## 96500 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
## 98000 0 1 0 0 0 1 1 4 0 1 0 0 2 0 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 100000 0 0 0 0 2 0 1 1 0 1 1 0 0 0 1 2
## 100400 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 104000 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0
## 105000 1 0 0 0 0 0 0 0 1 2 0 0 1 0 2 0
## 106000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 108000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
## 115000 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## s_avg
## salary 3.45 3.5 3.6 3.7 3.8 4
## 64000 0 1 0 0 0 0
## 77000 0 0 0 0 0 0
## 78256 0 0 0 0 0 0
## 82000 0 0 0 0 0 0
## 85000 0 2 0 0 0 0
## 86000 0 1 0 0 0 0
## 88000 0 0 0 0 0 0
## 88500 0 0 0 0 0 0
## 90000 0 0 0 0 0 0
## 92000 0 0 0 0 0 0
## 93000 0 0 0 0 0 0
## 95000 0 0 1 0 0 0
## 96000 0 0 0 0 0 0
## 96500 0 0 0 0 0 0
## 97000 0 0 0 0 0 0
## 98000 0 0 0 0 0 0
## 99000 0 0 0 0 0 0
## 100000 0 0 0 0 0 0
## 100400 0 0 0 0 0 0
## 101000 0 0 0 0 0 0
## 101100 0 0 0 0 0 0
## 101600 0 0 0 0 0 0
## 102500 0 0 0 0 0 0
## 103000 0 0 0 0 0 0
## 104000 0 0 0 0 0 0
## 105000 1 1 1 0 1 0
## 106000 0 1 0 1 0 0
## 107000 0 0 0 0 0 0
## 107300 0 0 0 0 0 0
## 107500 0 0 0 0 0 0
## 108000 0 1 0 0 0 0
## 110000 0 0 1 0 0 0
## 112000 0 0 1 0 0 0
## 115000 0 0 1 1 0 0
## 118000 0 1 0 0 0 0
## 120000 0 2 0 0 1 0
## 126710 0 0 0 0 0 0
## 130000 0 0 0 0 0 0
## 145800 0 0 0 0 0 0
## 146000 0 0 0 0 0 1
## 162000 0 0 1 0 0 0
## 220000 0 0 0 0 0 0
xtabs(~salary+f_avg,data=Job.df)
## f_avg
## salary 0 2 2.25 2.5 2.67 2.75 2.83 3 3.25 3.33 3.5 3.6 3.67 3.75 4
## 64000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 85000 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0
## 86000 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
## 88000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 88500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 90000 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0
## 92000 0 0 0 0 0 0 0 0 1 0 2 0 0 0 0
## 93000 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0
## 95000 0 0 0 0 0 1 0 1 2 0 2 0 1 0 0
## 96000 0 0 0 1 0 0 0 0 2 0 1 0 0 0 0
## 96500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
## 98000 0 0 0 1 0 2 0 2 5 0 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 1 0 5 1 0 1 0 1 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 105000 0 1 0 0 0 0 1 3 2 0 4 0 0 0 0
## 106000 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 112000 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0
## 115000 0 0 0 1 0 0 0 1 1 0 1 0 0 0 1
## 118000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 1 2 0 0 0 0 0 1
## 126710 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 146000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 220000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
xtabs(~salary+frstlang,data=Job.df)
## frstlang
## salary 1 2
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
xtabs(~salary+work_yrs,data=Job.df)
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 10 15 16
## 64000 0 0 1 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 1 0
xtabs(~salary+satis,data=Job.df)
## satis
## salary 3 4 5 6 7
## 64000 0 0 0 0 1
## 77000 0 0 0 1 0
## 78256 0 0 1 0 0
## 82000 0 0 0 0 1
## 85000 0 0 1 3 0
## 86000 0 0 2 0 0
## 88000 0 0 0 0 1
## 88500 0 0 0 1 0
## 90000 0 0 2 0 1
## 92000 0 0 1 1 1
## 93000 0 0 1 2 0
## 95000 1 1 1 2 2
## 96000 0 0 1 1 2
## 96500 0 0 0 1 0
## 97000 0 0 0 1 1
## 98000 0 0 2 5 3
## 99000 0 0 0 1 0
## 100000 0 0 1 6 2
## 100400 0 0 0 0 1
## 101000 0 0 1 1 0
## 101100 0 0 0 1 0
## 101600 0 0 0 1 0
## 102500 0 0 1 0 0
## 103000 0 0 0 1 0
## 104000 0 0 1 1 0
## 105000 0 0 4 6 1
## 106000 0 0 0 2 1
## 107000 0 0 1 0 0
## 107300 0 0 0 0 1
## 107500 0 0 1 0 0
## 108000 0 0 0 2 0
## 110000 0 0 1 0 0
## 112000 0 0 0 2 1
## 115000 0 0 3 2 0
## 118000 0 0 0 0 1
## 120000 0 0 2 2 0
## 126710 0 0 0 1 0
## 130000 0 0 0 0 1
## 145800 0 0 0 1 0
## 146000 0 0 0 1 0
## 162000 0 0 1 0 0
## 220000 0 0 0 1 0
Salary.df$GotPlaced = (Salary.df$salary >1000)
View(Salary.df)
Salary.df$GotPlaced <- factor(Salary.df$GotPlaced)
str(Salary.df)
## 'data.frame': 274 obs. of 14 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot : int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc : int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc : int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc : int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs : int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang : int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
## $ GotPlaced: Factor w/ 2 levels "FALSE","TRUE": 1 1 1 1 1 1 1 1 1 1 ...
#Number of Placed and Not Placed candiadtes
placed <- table(Salary.df$GotPlaced == 'TRUE')
placed
##
## FALSE TRUE
## 171 103
#allPlaced / notPlaced versus Sex
placed_Sexwise <- xtabs(~ GotPlaced + sex , data=Salary.df)
placed_Sexwise
## sex
## GotPlaced 1 2
## FALSE 134 37
## TRUE 72 31
#allPlaced / notPlaced versus First Language
placed_Language <- xtabs(~ GotPlaced + frstlang, data=Salary.df)
placed_Language
## frstlang
## GotPlaced 1 2
## FALSE 146 25
## TRUE 96 7
H1: The percentage of Females placed is more than Males
Chi Square Test : percentage of female who got placed is higher than percentage of male who got placed
chisq.test(placed_Sexwise)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: placed_Sexwise
## X-squared = 2.033, df = 1, p-value = 0.1539
H2: The percentage of people placed whose first language is English is higher than the percentage of people placed whose first language is not English
chisq.test(placed_Language)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: placed_Language
## X-squared = 3.0938, df = 1, p-value = 0.07859
t.test(Job.df$age,Job.df$salary)
##
## Welch Two Sample t-test
##
## data: Job.df$age and Job.df$salary
## t = -58.503, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -106496.23 -99511.69
## sample estimates:
## mean of x mean of y
## 26.7767 103030.7379
t.test(Job.df$sex,Job.df$salary)
##
## Welch Two Sample t-test
##
## data: Job.df$sex and Job.df$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -106521.71 -99537.17
## sample estimates:
## mean of x mean of y
## 1.300971e+00 1.030307e+05
t.test(Job.df$gmat_tot,Job.df$salary)
##
## Welch Two Sample t-test
##
## data: Job.df$gmat_tot and Job.df$salary
## t = -58.168, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -105907.00 -98922.43
## sample estimates:
## mean of x mean of y
## 616.0194 103030.7379
t.test(Job.df$satis,Job.df$salary)
##
## Welch Two Sample t-test
##
## data: Job.df$satis and Job.df$salary
## t = -58.515, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -106517.13 -99532.58
## sample estimates:
## mean of x mean of y
## 5.883495e+00 1.030307e+05
t.test(Job.df$frstlang,Job.df$salary)
##
## Welch Two Sample t-test
##
## data: Job.df$frstlang and Job.df$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -106521.9 -99537.4
## sample estimates:
## mean of x mean of y
## 1.067961e+00 1.030307e+05
library(corrplot)
## corrplot 0.84 loaded
colnames(Job.df)
## [1] "age" "sex" "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
## [7] "s_avg" "f_avg" "quarter" "work_yrs" "frstlang" "salary"
## [13] "satis"
dataColumns <- Job.df[, c("age","work_yrs", "gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc", "s_avg", "f_avg", "quarter", "satis")]
N <- cor(dataColumns)
corrplot(N, method="circle")
res <- cor(dataColumns)
round(res, 2)
## age work_yrs gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age 1.00 0.88 -0.08 -0.17 0.02 -0.10 0.16 -0.22
## work_yrs 0.88 1.00 -0.12 -0.18 -0.03 -0.13 0.16 -0.22
## gmat_tot -0.08 -0.12 1.00 0.67 0.78 0.97 0.17 0.12
## gmat_qpc -0.17 -0.18 0.67 1.00 0.09 0.66 0.02 0.10
## gmat_vpc 0.02 -0.03 0.78 0.09 1.00 0.78 0.16 0.02
## gmat_tpc -0.10 -0.13 0.97 0.66 0.78 1.00 0.14 0.07
## s_avg 0.16 0.16 0.17 0.02 0.16 0.14 1.00 0.45
## f_avg -0.22 -0.22 0.12 0.10 0.02 0.07 0.45 1.00
## quarter -0.13 -0.13 -0.11 0.01 -0.13 -0.10 -0.84 -0.43
## satis 0.11 0.06 0.06 0.00 0.15 0.12 -0.14 -0.12
## quarter satis
## age -0.13 0.11
## work_yrs -0.13 0.06
## gmat_tot -0.11 0.06
## gmat_qpc 0.01 0.00
## gmat_vpc -0.13 0.15
## gmat_tpc -0.10 0.12
## s_avg -0.84 -0.14
## f_avg -0.43 -0.12
## quarter 1.00 0.23
## satis 0.23 1.00
REGModel1 <- salary ~
work_yrs + s_avg + f_avg + gmat_qpc + gmat_vpc + sex + frstlang + satis
fit<- lm(REGModel1, data = Salary.df)
summary(fit)
##
## Call:
## lm(formula = REGModel1, data = Salary.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68686 -44138 -6378 47351 193852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16262.819 34125.373 0.477 0.6341
## work_yrs -911.138 945.123 -0.964 0.3359
## s_avg 23615.184 9555.538 2.471 0.0141 *
## f_avg -6608.208 6727.160 -0.982 0.3268
## gmat_qpc -23.938 212.555 -0.113 0.9104
## gmat_vpc -134.658 197.569 -0.682 0.4961
## sex 4299.530 6928.943 0.621 0.5355
## frstlang -9074.381 10139.543 -0.895 0.3716
## satis -44.597 7.935 -5.620 4.82e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47950 on 265 degrees of freedom
## Multiple R-squared: 0.1403, Adjusted R-squared: 0.1144
## F-statistic: 5.407 on 8 and 265 DF, p-value: 2.569e-06
REGModel2 <- salary ~
work_yrs +
sex +
frstlang +
satis
fit <- lm(REGModel2, data = Salary.df)
summary(fit)
##
## Call:
## lm(formula = REGModel2, data = Salary.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53048 -46140 -1073 47952 182479
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51950.743 14083.603 3.689 0.000273 ***
## work_yrs -445.138 907.603 -0.490 0.624212
## sex 5955.031 6747.840 0.883 0.378289
## frstlang -9695.438 9090.930 -1.066 0.287156
## satis -45.340 7.928 -5.719 2.85e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48170 on 269 degrees of freedom
## Multiple R-squared: 0.1194, Adjusted R-squared: 0.1063
## F-statistic: 9.117 on 4 and 269 DF, p-value: 6.396e-07
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Total.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
NoJob.df <- setdiff(Total.df, Job.df)
head(NoJob.df)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter work_yrs
## 1 23 2 620 77 87 87 3.4 3.00 1 2
## 2 24 1 610 90 71 87 3.5 4.00 1 2
## 3 24 1 670 99 78 95 3.3 3.25 1 2
## 4 24 1 570 56 81 75 3.3 2.67 1 1
## 5 24 2 710 93 98 98 3.6 3.75 1 2
## 6 24 1 640 82 89 91 3.9 3.75 1 2
## frstlang salary satis
## 1 1 0 7
## 2 1 0 6
## 3 1 0 6
## 4 1 0 7
## 5 1 999 5
## 6 1 0 6
xtabs(~salary+age,data=NoJob.df)
## age
## salary 22 23 24 25 26 27 28 29 30 31 32 34 35 36 37 39 42 43 48
## 0 1 3 13 9 10 14 6 11 2 2 5 3 3 2 1 1 1 2 1
## 998 0 0 2 15 11 11 4 0 1 2 0 0 0 0 0 0 0 0 0
## 999 0 0 2 6 5 7 3 5 3 2 2 0 0 0 0 0 0 0 0
xtabs(~salary+sex,data=NoJob.df)
## sex
## salary 1 2
## 0 67 23
## 998 37 9
## 999 30 5
xtabs(~salary+gmat_tot,data=NoJob.df)
## gmat_tot
## salary 450 460 480 500 510 530 540 550 560 570 580 590 600 610 620 630 640
## 0 1 0 1 0 2 3 3 4 8 7 4 3 3 9 4 5 6
## 998 1 1 0 0 0 0 0 0 3 0 2 2 5 4 0 7 3
## 999 0 0 0 1 0 0 0 1 2 4 1 2 3 0 4 2 2
## gmat_tot
## salary 650 660 670 680 690 700 710 720 730 740 750 760 790
## 0 5 3 4 3 0 2 4 2 1 1 1 1 0
## 998 2 3 5 2 2 0 1 1 0 2 0 0 0
## 999 2 3 1 1 2 0 1 0 1 1 0 0 1
xtabs(~salary+s_avg,data=NoJob.df)
## s_avg
## salary 2 2.1 2.2 2.3 2.4 2.45 2.5 2.6 2.67 2.7 2.73 2.8 2.82 2.9 3 3.08
## 0 1 2 1 2 2 0 0 1 0 8 0 9 1 9 10 1
## 998 0 0 1 0 1 0 2 2 1 9 1 1 0 4 6 0
## 999 0 0 0 1 2 1 5 2 0 6 0 2 0 3 2 0
## s_avg
## salary 3.09 3.1 3.17 3.18 3.2 3.25 3.27 3.3 3.38 3.4 3.45 3.5 3.56 3.6
## 0 2 6 1 0 4 1 2 9 1 7 1 2 0 4
## 998 0 3 0 1 2 0 0 2 0 3 1 3 1 0
## 999 0 4 0 0 1 0 0 3 0 1 0 1 0 1
## s_avg
## salary 3.64 3.8 3.9 4
## 0 1 1 1 0
## 998 0 1 0 1
## 999 0 0 0 0
xtabs(~salary+f_avg,data=NoJob.df)
## f_avg
## salary 0 2 2.25 2.33 2.5 2.67 2.75 2.8 3 3.17 3.2 3.25 3.33 3.4 3.5 3.6
## 0 1 3 2 0 8 1 9 0 24 1 1 18 1 1 7 1
## 998 1 0 1 1 4 0 7 0 9 1 0 11 0 0 3 0
## 999 0 0 1 0 4 1 7 2 10 0 1 4 1 1 2 0
## f_avg
## salary 3.67 3.75 3.83 4
## 0 1 6 1 4
## 998 1 2 0 5
## 999 0 1 0 0
xtabs(~salary+frstlang,data=NoJob.df)
## frstlang
## salary 1 2
## 0 82 8
## 998 38 8
## 999 26 9
xtabs(~salary+work_yrs,data=NoJob.df)
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 9 10 11 12 13 16 18 22
## 0 1 12 22 14 9 12 2 5 2 1 1 2 2 1 1 1 2
## 998 0 2 16 13 12 0 1 1 1 0 0 0 0 0 0 0 0
## 999 1 2 6 8 11 2 2 2 0 1 0 0 0 0 0 0 0
xtabs(~salary+satis,data=NoJob.df)
## satis
## salary 1 2 3 4 5 6 7 998
## 0 0 0 0 4 36 40 10 0
## 998 0 0 0 0 0 0 0 46
## 999 1 1 4 12 9 7 1 0
t.test(NoJob.df$age,NoJob.df$salary)
##
## Welch Two Sample t-test
##
## data: NoJob.df$age and NoJob.df$salary
## t = -11.644, df = 170.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -520.7128 -369.7550
## sample estimates:
## mean of x mean of y
## 27.7076 472.9415
t.test(NoJob.df$sex,NoJob.df$salary)
##
## Welch Two Sample t-test
##
## data: NoJob.df$sex and NoJob.df$salary
## t = -12.337, df = 170, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -547.2018 -396.2485
## sample estimates:
## mean of x mean of y
## 1.216374 472.941520
t.test(NoJob.df$gmat_tot,NoJob.df$salary)
##
## Welch Two Sample t-test
##
## data: NoJob.df$gmat_tot and NoJob.df$salary
## t = 3.857, df = 175.12, p-value = 0.0001611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 72.55219 224.60570
## sample estimates:
## mean of x mean of y
## 621.5205 472.9415
t.test(NoJob.df$satis,NoJob.df$salary)
##
## Welch Two Sample t-test
##
## data: NoJob.df$satis and NoJob.df$salary
## t = -3.9327, df = 334.87, p-value = 0.0001021
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -300.9328 -100.2602
## sample estimates:
## mean of x mean of y
## 272.3450 472.9415
t.test(NoJob.df$frstlang,NoJob.df$salary)
##
## Welch Two Sample t-test
##
## data: NoJob.df$frstlang and NoJob.df$salary
## t = -12.339, df = 170, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -547.2720 -396.3186
## sample estimates:
## mean of x mean of y
## 1.146199 472.941520
We have analysed almost all the aspect related to salary of the MBA graduate. Also different factors and their correlation.