This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
mba.df<-read.csv(paste("MBA Starting Salaries Data.csv"), sep=",")
job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
summary(mba.df)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
hist(mba.df$age,col="aliceblue", xlab="Age in years",main="Age Distribution")
mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex,col = "antiquewhite",main = "Gender distribution")
hist(mba.df$gmat_tot, xlab="GMAT total score",main="Distribution of GMAT scores", breaks=20,col="antiquewhite1")
hist(mba.df$work_yrs,col="antiquewhite2",xlab="No. of years of work experience",main = "Work Experience",breaks = 20)
mba.df$frstlang = factor(mba.df$frstlang, levels=c(1,2), labels=c("English","Others"))
plot(mba.df$frstlang,col="antiquewhite3",main = "Language Distribution")
newdata <- mba.df[ which(mba.df$satis<='7'), ]
hist(newdata$satis,breaks =5,col="aquamarine2",xlab="Degree of Satisfaction (1=low,7=high)", main="Satisfaction distribution")
newdata1 <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]
hist(newdata1$salary,breaks=5,col="aquamarine3",xlab="starting salary", main="Salary distribution")
boxplot(mba.df$gmat_qpc, main="Quant Percentile Distribution", xlab="GMAT QPC", horizontal = TRUE)
boxplot(mba.df$gmat_vpc, main="Verbal Percentile Distribution", xlab="GMAT VPC", horizontal = TRUE)
boxplot(mba.df$gmat_tpc, main="Overall Percentile Distribution", xlab="GMAT TPC", horizontal = TRUE)
boxplot(mba.df$s_avg, main="Spring MBA Avg Dist.", xlab="s_avg", horizontal = TRUE)
boxplot(mba.df$f_avg, main="Fall MBA Avg Dist.", xlab="f_avg", horizontal = TRUE)
boxplot(mba.df$gmat_qpc, main="Quartile Ranking Dist.", xlab="Quart", horizontal = TRUE)
library(car)
pairs(~salary+sex+age+gmat_tpc+frstlang+satis+work_yrs, data=job.df,main="Salary versus other variables")
plot(mba.df$gmat_tpc, mba.df$work_yrs, main = "Total GMAT Percentile vs Work Experience", xlab = "GMAT Percentile", ylab = "Work Experience")
abline(lm(mba.df$work_yrs~mba.df$gmat_tpc), col = "blue")
library(corrgram)
corrgram(job.df, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA starting salary analysis Correlogram")
x <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
y <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
cov(x,y)
## age gmat_tot gmat_qpc gmat_vpc
## age 10.7045498 -13.054445 -7.22796497 9.505045e-01
## gmat_tot -13.0544451 2569.293737 452.14258519 6.386360e+02
## gmat_qpc -7.2279650 452.142585 179.18027794 2.045850e+01
## gmat_vpc 0.9505045 638.636018 20.45849990 2.606602e+02
## gmat_tpc -3.4602132 539.362269 97.03607462 1.393882e+02
## s_avg 0.1938587 3.299562 0.07838473 9.694594e-01
## f_avg -0.3462517 3.027432 0.64252142 1.803303e-01
## work_yrs 8.6728536 -18.738816 -7.36245955 -1.366838e+00
## salary 29210.5193223 -82124.485056 3382.43784504 -3.964803e+04
## gmat_tpc s_avg f_avg work_yrs
## age -3.460213e+00 0.19385875 -0.34625167 8.6728536
## gmat_tot 5.393623e+02 3.29956215 3.02743194 -18.7388159
## gmat_qpc 9.703607e+01 0.07838473 0.64252142 -7.3624595
## gmat_vpc 1.393882e+02 0.96945936 0.18033029 -1.3668380
## gmat_tpc 1.211342e+02 0.58062916 0.37850562 -4.3892062
## s_avg 5.806292e-01 0.14325138 0.08231046 0.1860480
## f_avg 3.785056e-01 0.08231046 0.23786375 -0.3176271
## work_yrs -4.389206e+00 0.18604797 -0.31762707 9.0630116
## salary -2.596339e+04 688.02042071 -924.11288026 24458.1995050
## salary
## age 2.921052e+04
## gmat_tot -8.212449e+04
## gmat_qpc 3.382438e+03
## gmat_vpc -3.964803e+04
## gmat_tpc -2.596339e+04
## s_avg 6.880204e+02
## f_avg -9.241129e+02
## work_yrs 2.445820e+04
## salary 3.192940e+08
job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
xtabs(~salary+age,data=job.df)
## age
## salary 22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
## 64000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0
## 86000 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 90000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## 92000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0
## 95000 0 0 1 5 0 0 0 1 0 0 0 0 0 0 0
## 96000 0 0 1 1 2 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
## 98000 0 1 3 2 1 1 1 1 0 0 0 0 0 0 0
## 99000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 1 4 1 1 1 0 0 0 1 0 0 0 0 0
## 100400 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 101000 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 101600 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 103000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0
## 105000 0 1 1 2 3 1 0 0 1 1 0 0 1 0 0
## 106000 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0
## 107000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 112000 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0
## 115000 0 0 1 1 0 3 0 0 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 120000 0 0 0 0 0 1 1 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 162000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
"age group of 24-30 were more placed than the others"
## [1] "age group of 24-30 were more placed than the others"
xtabs(~salary+sex, data=job.df)
## sex
## salary Male Female
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
"From this table we see that most higher starting salaries have been awarded to men.
"
## [1] "From this table we see that most higher starting salaries have been awarded to men.\n"
xtabs(~salary+gmat_tot,data=job.df)
## gmat_tot
## salary 500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
## 64000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 90000 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 93000 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
## 95000 0 0 1 0 0 2 0 0 0 0 2 0 0 0 0 0
## 96000 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## 98000 0 0 0 0 0 1 3 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 2 0 1 0 1 1 0 1 0 2 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104000 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 2 0 2 3 0 1 0 1 0 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 108000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 112000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 670 680 700 710 720
## 64000 0 0 0 0 0
## 77000 0 0 0 0 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 1 0 1
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 0 0 1 0
## 93000 0 0 0 0 0
## 95000 2 0 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 1 1 0 1 0
## 99000 0 0 0 0 0
## 100000 0 0 0 1 0
## 100400 0 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 1 0 0 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 2 0 0 0
## 107000 0 0 0 0 0
## 107300 0 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 1 1 0 0 0
## 115000 0 0 0 1 0
## 118000 0 0 0 0 0
## 120000 1 0 1 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 0 0 0 0 0
## 162000 0 0 1 0 0
## 220000 0 0 0 0 0
"Students with gmat_tot>=560 are "
## [1] "Students with gmat_tot>=560 are "
xtabs(~salary+gmat_qpc,data=job.df)
## gmat_qpc
## salary 39 43 50 52 55 56 60 64 67 68 71 72 74 75 77 78 79 81 82 83 84 85
## 64000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 95000 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0
## 96000 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 96500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0
## 98000 1 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 1 1 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 100000 0 0 0 0 0 0 0 0 0 1 0 2 0 0 1 0 0 1 0 0 1 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 0 0 1 1 0 1 0 2 1 0 0 0 1 1 0 0 1 0
## 106000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 115000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
## 126710 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_qpc
## salary 87 88 89 90 91 93 94 95 96 97 98 99
## 64000 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 1 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 1 0 0 1 0 1 0 0 0
## 86000 1 0 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 1 0 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 1 0 0 0 0 0 0 0 0 0
## 92000 0 0 1 0 0 0 0 0 1 0 0 1
## 93000 0 0 1 0 0 0 0 0 0 0 0 0
## 95000 1 0 0 0 0 0 0 1 0 0 0 0
## 96000 1 0 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 0 0
## 98000 0 0 1 0 0 2 0 0 0 1 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 1 1 0 0 0 0 0 1
## 100400 0 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 1 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 1 0 0
## 104000 0 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 1 0 0 0 0 1 0 0 0 0
## 106000 0 0 0 0 0 0 0 0 1 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 1 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0 0 0 0
## 110000 0 0 1 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 0 1 0 0 0 1 0 0 0
## 115000 0 0 0 0 0 0 0 2 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 0
## 120000 1 0 0 0 0 0 0 1 0 0 0 0
## 126710 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 1 0 0 0 0 0 0 0 0 0
## 145800 0 1 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 1 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0
xtabs(~salary+gmat_vpc,data=job.df)
## gmat_vpc
## salary 30 33 37 45 50 54 58 62 63 67 71 74 75 78 81 84 87 89 90 91 92 93
## 64000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1
## 92000 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 95000 0 0 0 0 0 0 0 1 0 0 2 0 0 0 2 0 0 1 0 0 0 0
## 96000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
## 98000 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0
## 99000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 2 1 0 0 0 1 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 1 1 1 0 0 0 2 1 0 0 1 1 1 0 0 1 0 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 112000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1
## 115000 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0
## 126710 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 220000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_vpc
## salary 95 96 97 98 99
## 64000 0 0 0 0 0
## 77000 0 0 0 1 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 0 2 0
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 1 0 0 0
## 93000 0 0 0 1 0
## 95000 0 1 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 0 0 0 2 0
## 99000 0 0 0 0 0
## 100000 0 0 0 0 0
## 100400 1 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 0 0 1 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 1 0 0 1
## 107000 0 0 0 0 0
## 107300 1 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 0 0 0 0 0
## 115000 1 0 0 1 0
## 118000 0 0 0 0 0
## 120000 2 0 0 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 1 0 0 0 0
## 162000 0 0 0 0 0
## 220000 0 0 0 0 0
xtabs(~salary+gmat_tpc,data=job.df)
## gmat_tpc
## salary 51 52 58 61 62 65 68 69 71 72 75 78 79 80 81 83 84 85 86 87 88 89
## 64000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 90000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
## 92000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 93000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0
## 95000 0 0 0 1 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0
## 96000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0
## 96500 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0
## 98000 0 0 0 0 0 0 0 0 0 1 3 1 0 0 1 0 0 0 1 0 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 0 1
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 104000 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 0 0 1 1 0 0 2 3 0 0 0 1 0 0 0 0 0 1
## 106000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1
## 118000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0
## 126710 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tpc
## salary 90 91 93 94 95 96 97 98 99
## 64000 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 1 0 0 0 0
## 78256 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 1 0 0 0 0
## 85000 0 0 0 1 0 0 0 1 1
## 86000 1 0 0 0 0 1 0 0 0
## 88000 0 0 1 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0
## 90000 0 0 1 0 0 0 0 0 0
## 92000 0 0 0 0 1 0 0 1 0
## 93000 0 0 0 0 0 0 0 0 0
## 95000 0 0 0 0 2 0 0 0 0
## 96000 0 0 1 0 0 0 0 0 0
## 96500 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0
## 98000 0 0 0 0 0 1 1 0 1
## 99000 0 0 0 0 0 0 0 0 0
## 100000 0 0 2 0 0 0 0 0 1
## 100400 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 1 0 0 0 0
## 101600 0 1 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 1 0 0 0
## 103000 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 0 0 0
## 105000 0 0 1 0 0 1 0 0 0
## 106000 0 0 0 0 0 2 0 0 0
## 107000 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 1 0 0 0 0 0
## 107500 0 1 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0
## 110000 0 1 0 0 0 0 0 0 0
## 112000 0 0 0 0 1 0 1 0 0
## 115000 0 0 0 0 0 0 0 1 0
## 118000 0 0 0 0 0 0 0 0 0
## 120000 0 0 0 0 1 0 0 1 0
## 126710 0 0 0 0 0 0 0 0 0
## 130000 0 0 1 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0
## 146000 0 1 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 0 0 1 0
## 220000 0 0 0 0 0 0 0 0 0
xtabs(~salary+frstlang,data=job.df)
## frstlang
## salary English Others
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
"Students with English as first language are mostly preferred and get higher salaries compared to those whose first language is not English.
"
## [1] "Students with English as first language are mostly preferred and get higher salaries compared to those whose first language is not English.\n"
xtabs(~salary+work_yrs,data=job.df)
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 10 15 16
## 64000 0 0 1 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 1 0
"From the above table that a minimum of 1 years of work experience is necessary
"
## [1] "From the above table that a minimum of 1 years of work experience is necessary\n"
xtabs(~salary+satis,data=job.df)
## satis
## salary 3 4 5 6 7
## 64000 0 0 0 0 1
## 77000 0 0 0 1 0
## 78256 0 0 1 0 0
## 82000 0 0 0 0 1
## 85000 0 0 1 3 0
## 86000 0 0 2 0 0
## 88000 0 0 0 0 1
## 88500 0 0 0 1 0
## 90000 0 0 2 0 1
## 92000 0 0 1 1 1
## 93000 0 0 1 2 0
## 95000 1 1 1 2 2
## 96000 0 0 1 1 2
## 96500 0 0 0 1 0
## 97000 0 0 0 1 1
## 98000 0 0 2 5 3
## 99000 0 0 0 1 0
## 100000 0 0 1 6 2
## 100400 0 0 0 0 1
## 101000 0 0 1 1 0
## 101100 0 0 0 1 0
## 101600 0 0 0 1 0
## 102500 0 0 1 0 0
## 103000 0 0 0 1 0
## 104000 0 0 1 1 0
## 105000 0 0 4 6 1
## 106000 0 0 0 2 1
## 107000 0 0 1 0 0
## 107300 0 0 0 0 1
## 107500 0 0 1 0 0
## 108000 0 0 0 2 0
## 110000 0 0 1 0 0
## 112000 0 0 0 2 1
## 115000 0 0 3 2 0
## 118000 0 0 0 0 1
## 120000 0 0 2 2 0
## 126710 0 0 0 1 0
## 130000 0 0 0 0 1
## 145800 0 0 0 1 0
## 146000 0 0 0 1 0
## 162000 0 0 1 0 0
## 220000 0 0 0 1 0
"Better satisfaction lead to better starting salaries"
## [1] "Better satisfaction lead to better starting salaries"
chisq.test(job.df$age,job.df$salary)
## Warning in chisq.test(job.df$age, job.df$salary): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$age and job.df$salary
## X-squared = 717.62, df = 574, p-value = 3.929e-05
chisq.test(job.df$sex,job.df$salary)
## Warning in chisq.test(job.df$sex, job.df$salary): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$sex and job.df$salary
## X-squared = 52.681, df = 41, p-value = 0.1045
chisq.test(job.df$gmat_tot,job.df$salary)
## Warning in chisq.test(job.df$gmat_tot, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$gmat_tot and job.df$salary
## X-squared = 927.24, df = 820, p-value = 0.005279
chisq.test(job.df$gmat_qpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_qpc, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$gmat_qpc and job.df$salary
## X-squared = 1464.3, df = 1353, p-value = 0.018
chisq.test(job.df$gmat_vpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_vpc, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$gmat_vpc and job.df$salary
## X-squared = 1183.3, df = 1066, p-value = 0.006802
chisq.test(job.df$gmat_tpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_tpc, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$gmat_tpc and job.df$salary
## X-squared = 1422.2, df = 1230, p-value = 0.0001065
chisq.test(job.df$s_avg,job.df$salary)
## Warning in chisq.test(job.df$s_avg, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$s_avg and job.df$salary
## X-squared = 792.97, df = 861, p-value = 0.9524
chisq.test(job.df$f_avg,job.df$salary)
## Warning in chisq.test(job.df$f_avg, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$f_avg and job.df$salary
## X-squared = 596.28, df = 574, p-value = 0.2518
chisq.test(job.df$work_yrs,job.df$salary)
## Warning in chisq.test(job.df$work_yrs, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$work_yrs and job.df$salary
## X-squared = 535.23, df = 451, p-value = 0.003809
chisq.test(job.df$frstlang,job.df$salary)
## Warning in chisq.test(job.df$frstlang, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$frstlang and job.df$salary
## X-squared = 69.847, df = 41, p-value = 0.003296
chisq.test(job.df$satis,job.df$salary)
## Warning in chisq.test(job.df$satis, job.df$salary): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: job.df$satis and job.df$salary
## X-squared = 109.1, df = 164, p-value = 0.9997
The results of the Chi-Square tests tell us that age, GMAT percentiles, work experience and first language are factors that affect starting salary (i.e p < 0.05), whereas sex, average GPA for Spring and Fall semesters, quartile ranking and satisfaction with degree have no effect on the salary (p > 0.05). This, however, is in contrast with the results obtained from the plots that we observed earlier.
mbass<-read.csv(paste("MBA Starting Salaries Data.csv"),sep=",")
t.test(mbass$age,mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$age and mbass$salary
## t = -12.67, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45058.15 -32938.51
## sample estimates:
## mean of x mean of y
## 27.35766 39025.68978
t.test(mbass$sex, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$sex and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45084.26 -32964.62
## sample estimates:
## mean of x mean of y
## 1.248175 39025.689781
t.test(mbass$gmat_tot, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$gmat_tot and mbass$salary
## t = -12.477, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -44466.06 -32346.41
## sample estimates:
## mean of x mean of y
## 619.4526 39025.6898
t.test(mbass$gmat_qpc, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$gmat_qpc and mbass$salary
## t = -12.652, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45004.87 -32885.22
## sample estimates:
## mean of x mean of y
## 80.64234 39025.68978
t.test(mbass$gmat_vpc, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$gmat_vpc and mbass$salary
## t = -12.653, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45007.19 -32887.55
## sample estimates:
## mean of x mean of y
## 78.32117 39025.68978
t.test(mbass$gmat_tpc, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$gmat_tpc and mbass$salary
## t = -12.651, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45001.32 -32881.67
## sample estimates:
## mean of x mean of y
## 84.19708 39025.68978
t.test(mbass$s_avg, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$s_avg and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45082.49 -32962.84
## sample estimates:
## mean of x mean of y
## 3.025401 39025.689781
t.test(mbass$f_avg, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$f_avg and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45082.45 -32962.81
## sample estimates:
## mean of x mean of y
## 3.061533 39025.689781
t.test(mbass$work_yrs, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$work_yrs and mbass$salary
## t = -12.677, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45081.64 -32961.99
## sample estimates:
## mean of x mean of y
## 3.872263 39025.689781
t.test(mbass$frstlang, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$frstlang and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -45084.40 -32964.75
## sample estimates:
## mean of x mean of y
## 1.116788 39025.689781
t.test(mbass$satis, mbass$salary)
##
## Welch Two Sample t-test
##
## data: mbass$satis and mbass$salary
## t = -12.622, df = 273.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -44913.49 -32793.53
## sample estimates:
## mean of x mean of y
## 172.1788 39025.6898
In all the above T-Tests, we see the alternative hypothesis being stated as “true difference is not equal to zero” and we also get p < 2.2e - 16, which means that all the factors somehow influence the starting salary. This is in contrast to our analysis using graphs as well as Chi-Squared tests.
fit <- lm(salary ~gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc, data = job.df)
summary(fit)
##
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc,
## data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40370 -8250 -2164 5253 100097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109539.54 48054.24 2.279 0.0248 *
## gmat_tot 55.01 181.71 0.303 0.7627
## gmat_qpc 718.40 541.90 1.326 0.1880
## gmat_vpc 546.10 543.85 1.004 0.3178
## gmat_tpc -1663.16 801.57 -2.075 0.0406 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared: 0.06089, Adjusted R-squared: 0.02256
## F-statistic: 1.589 on 4 and 98 DF, p-value: 0.1834
Gmat_tpc is a significant variable in this model. The multiple R squared value indicates that the model accounts for 6% of the variance in the variables The residual error (17670) can be thought of as the average error in predicting salary using the various gmat data available
fit <- lm(salary ~age+sex+frstlang, data = job.df)
summary(fit)
##
## Call:
## lm(formula = salary ~ age + sex + frstlang, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28723 -9214 -1296 5524 80180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39233.7 13770.5 2.849 0.00533 **
## age 2409.7 508.7 4.737 7.26e-06 ***
## sexFemale -4343.6 3391.8 -1.281 0.20333
## frstlangOthers 8541.4 6531.3 1.308 0.19398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15480 on 99 degrees of freedom
## Multiple R-squared: 0.2714, Adjusted R-squared: 0.2493
## F-statistic: 12.29 on 3 and 99 DF, p-value: 6.687e-07
Age is a significant variable in this model The multiple R squared value indicates that the model accounts for 27% of the variance in the variables The residual error (15480) can be thought of as the average error in predicting salary using the various age data available
fit <- lm(salary ~s_avg+f_avg+work_yrs+satis, data = job.df)
summary(fit)
##
## Call:
## lm(formula = salary ~ s_avg + f_avg + work_yrs + satis, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33329 -7748 -853 3885 87689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 101048.7 20095.5 5.028 2.23e-06 ***
## s_avg 1588.0 4987.7 0.318 0.751
## f_avg -1186.1 3885.5 -0.305 0.761
## work_yrs 2649.6 572.3 4.630 1.12e-05 ***
## satis -1531.7 2075.3 -0.738 0.462
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16180 on 98 degrees of freedom
## Multiple R-squared: 0.2125, Adjusted R-squared: 0.1804
## F-statistic: 6.611 on 4 and 98 DF, p-value: 9.407e-05
work experience is a significant variable in this model. The multiple R squared value indicates that the model accounts for 21% of the variance in the variables The residual error (16180) can be thought of as the average error in predicting salary using the various work years data available
These regression models tell us how each variable is suited for a straight-line equation of the form y=mx+c (y being salary and x being the variables) with respect to starting salary. Salary is dependent on age, work exp, gmat_tpc as p-value<0.05
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
mbasal.df<-read.csv(paste("MBA Starting Salaries Data (1).csv"),sep=",")
train <- mbasal.df[1:198,]
test <- mbasal.df[199:274,]
train$sex[train$sex<2 & train$sex>0] <- 0
train$sex[train$sex<3 & train$sex>1] <- 1
model <- glm(sex ~.,family=binomial(link='logit'),data=train)
summary(model)
##
## Call:
## glm(formula = sex ~ ., family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5784 -0.7701 -0.6039 1.0382 1.9610
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.698e+00 6.582e+00 1.018 0.3088
## age -1.382e-01 1.073e-01 -1.288 0.1976
## gmat_tot -1.904e-03 1.404e-02 -0.136 0.8921
## gmat_qpc -7.837e-02 4.611e-02 -1.700 0.0892 .
## gmat_vpc -1.769e-02 4.575e-02 -0.387 0.6990
## gmat_tpc 8.354e-02 7.368e-02 1.134 0.2568
## s_avg -6.660e-01 1.383e+00 -0.482 0.6300
## f_avg 1.001e-01 4.067e-01 0.246 0.8056
## quarter -6.379e-01 5.105e-01 -1.250 0.2114
## work_yrs 5.536e-02 1.193e-01 0.464 0.6425
## frstlang 6.850e-01 6.055e-01 1.131 0.2579
## salary -2.357e-06 3.818e-06 -0.617 0.5370
## satis -6.460e-04 5.149e-04 -1.255 0.2096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 232.04 on 197 degrees of freedom
## Residual deviance: 210.58 on 185 degrees of freedom
## AIC: 236.58
##
## Number of Fisher Scoring iterations: 5
anova(model, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: sex
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 197 232.04
## age 1 0.5058 196 231.53 0.47695
## gmat_tot 1 1.7178 195 229.81 0.18998
## gmat_qpc 1 10.5691 194 219.24 0.00115 **
## gmat_vpc 1 0.2286 193 219.02 0.63254
## gmat_tpc 1 1.6816 192 217.33 0.19472
## s_avg 1 2.0856 191 215.25 0.14869
## f_avg 1 0.0708 190 215.18 0.79011
## quarter 1 1.7875 189 213.39 0.18124
## work_yrs 1 0.1384 188 213.25 0.70991
## frstlang 1 0.9814 187 212.27 0.32186
## salary 1 0.0318 186 212.24 0.85853
## satis 1 1.6549 185 210.58 0.19829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
larger p-value here indicates that the model without the variable explains more or less the same amount of variation. in this the lowest value of p is for gmat_qpc
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
fitted.results <- predict(model,newdata=subset(test,select=c(1,2,3,4,5,6,7,8,9,10,11,12,13)),type='response')
fitted.results <- ifelse(fitted.results > 0.5,1,0)
misClasificError <- mean(fitted.results != test$salary)
print(paste('Accuracy',1-misClasificError))
## [1] "Accuracy 0.289473684210526"
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
p <- predict(model, newdata=subset(test,select=c(1,2,3,4,5,6,7,8,9,10,11,12,13)), type="response")
pr <- prediction(p, test$sex)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)
auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.5230415