R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

mba.df<-read.csv(paste("MBA Starting Salaries Data.csv"), sep=",")
job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
summary(mba.df)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
hist(mba.df$age,col="aliceblue", xlab="Age in years",main="Age Distribution")

mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex,col = "antiquewhite",main = "Gender distribution")

hist(mba.df$gmat_tot, xlab="GMAT total score",main="Distribution of GMAT scores", breaks=20,col="antiquewhite1")

hist(mba.df$work_yrs,col="antiquewhite2",xlab="No. of years of work experience",main = "Work Experience",breaks = 20)

mba.df$frstlang = factor(mba.df$frstlang, levels=c(1,2), labels=c("English","Others"))
plot(mba.df$frstlang,col="antiquewhite3",main = "Language Distribution")

newdata <- mba.df[ which(mba.df$satis<='7'), ]
hist(newdata$satis,breaks =5,col="aquamarine2",xlab="Degree of Satisfaction (1=low,7=high)", main="Satisfaction  distribution")

newdata1 <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]
hist(newdata1$salary,breaks=5,col="aquamarine3",xlab="starting salary", main="Salary  distribution")

boxplot(mba.df$gmat_qpc, main="Quant Percentile Distribution", xlab="GMAT QPC", horizontal = TRUE)

boxplot(mba.df$gmat_vpc, main="Verbal Percentile Distribution", xlab="GMAT VPC", horizontal = TRUE)

boxplot(mba.df$gmat_tpc, main="Overall Percentile Distribution", xlab="GMAT TPC", horizontal = TRUE)

boxplot(mba.df$s_avg, main="Spring MBA Avg Dist.", xlab="s_avg", horizontal = TRUE)

boxplot(mba.df$f_avg, main="Fall MBA Avg Dist.", xlab="f_avg", horizontal = TRUE)

boxplot(mba.df$gmat_qpc, main="Quartile Ranking Dist.", xlab="Quart", horizontal = TRUE)

library(car)    

pairs(~salary+sex+age+gmat_tpc+frstlang+satis+work_yrs, data=job.df,main="Salary versus other variables")

plot(mba.df$gmat_tpc, mba.df$work_yrs, main = "Total GMAT Percentile vs Work Experience", xlab = "GMAT Percentile", ylab = "Work Experience")
abline(lm(mba.df$work_yrs~mba.df$gmat_tpc), col = "blue")

library(corrgram)
    corrgram(job.df, order=TRUE, lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="MBA starting salary analysis Correlogram")

x <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   y <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   cov(x,y)
##                    age      gmat_tot      gmat_qpc      gmat_vpc
## age         10.7045498    -13.054445   -7.22796497  9.505045e-01
## gmat_tot   -13.0544451   2569.293737  452.14258519  6.386360e+02
## gmat_qpc    -7.2279650    452.142585  179.18027794  2.045850e+01
## gmat_vpc     0.9505045    638.636018   20.45849990  2.606602e+02
## gmat_tpc    -3.4602132    539.362269   97.03607462  1.393882e+02
## s_avg        0.1938587      3.299562    0.07838473  9.694594e-01
## f_avg       -0.3462517      3.027432    0.64252142  1.803303e-01
## work_yrs     8.6728536    -18.738816   -7.36245955 -1.366838e+00
## salary   29210.5193223 -82124.485056 3382.43784504 -3.964803e+04
##               gmat_tpc        s_avg         f_avg      work_yrs
## age      -3.460213e+00   0.19385875   -0.34625167     8.6728536
## gmat_tot  5.393623e+02   3.29956215    3.02743194   -18.7388159
## gmat_qpc  9.703607e+01   0.07838473    0.64252142    -7.3624595
## gmat_vpc  1.393882e+02   0.96945936    0.18033029    -1.3668380
## gmat_tpc  1.211342e+02   0.58062916    0.37850562    -4.3892062
## s_avg     5.806292e-01   0.14325138    0.08231046     0.1860480
## f_avg     3.785056e-01   0.08231046    0.23786375    -0.3176271
## work_yrs -4.389206e+00   0.18604797   -0.31762707     9.0630116
## salary   -2.596339e+04 688.02042071 -924.11288026 24458.1995050
##                 salary
## age       2.921052e+04
## gmat_tot -8.212449e+04
## gmat_qpc  3.382438e+03
## gmat_vpc -3.964803e+04
## gmat_tpc -2.596339e+04
## s_avg     6.880204e+02
## f_avg    -9.241129e+02
## work_yrs  2.445820e+04
## salary    3.192940e+08
job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
xtabs(~salary+age,data=job.df)
##         age
## salary   22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   85000   1  0  0  1  1  1  0  0  0  0  0  0  0  0  0
##   86000   0  0  0  1  1  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   92000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  1  0  0  1  0  0  1  0  0  0  0  0
##   95000   0  0  1  5  0  0  0  1  0  0  0  0  0  0  0
##   96000   0  0  1  1  2  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  1  1  0  0  0  0  0  0  0  0
##   98000   0  1  3  2  1  1  1  1  0  0  0  0  0  0  0
##   99000   0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   100000  0  1  4  1  1  1  0  0  0  1  0  0  0  0  0
##   100400  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101000  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101600  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   103000  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0
##   105000  0  1  1  2  3  1  0  0  1  1  0  0  1  0  0
##   106000  0  0  0  0  0  0  0  1  2  0  0  0  0  0  0
##   107000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
##   107500  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  0  1  0  0  0  0  0  1  0
##   115000  0  0  1  1  0  3  0  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   120000  0  0  0  0  0  1  1  0  2  0  0  0  0  0  0
##   126710  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   162000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
"age group of 24-30 were more placed than the others"
## [1] "age group of 24-30 were more placed than the others"
xtabs(~salary+sex, data=job.df)
##         sex
## salary   Male Female
##   64000     0      1
##   77000     1      0
##   78256     0      1
##   82000     0      1
##   85000     1      3
##   86000     0      2
##   88000     0      1
##   88500     1      0
##   90000     3      0
##   92000     2      1
##   93000     2      1
##   95000     4      3
##   96000     3      1
##   96500     1      0
##   97000     2      0
##   98000     6      4
##   99000     0      1
##   100000    4      5
##   100400    1      0
##   101000    0      2
##   101100    1      0
##   101600    1      0
##   102500    1      0
##   103000    1      0
##   104000    2      0
##   105000   11      0
##   106000    2      1
##   107000    1      0
##   107300    1      0
##   107500    1      0
##   108000    2      0
##   110000    0      1
##   112000    3      0
##   115000    5      0
##   118000    1      0
##   120000    3      1
##   126710    1      0
##   130000    1      0
##   145800    1      0
##   146000    1      0
##   162000    1      0
##   220000    0      1
"From this table we see that most higher starting salaries have been awarded to men.
"
## [1] "From this table we see that most higher starting salaries have been awarded to men.\n"
xtabs(~salary+gmat_tot,data=job.df)
##         gmat_tot
## salary   500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
##   64000    0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   78256    0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   90000    0   0   0   0   0   0   0   1   0   0   0   0   1   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   93000    0   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0
##   95000    0   0   1   0   0   2   0   0   0   0   2   0   0   0   0   0
##   96000    0   0   0   0   0   1   0   0   1   1   0   0   0   0   1   0
##   96500    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
##   98000    0   0   0   0   0   1   3   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   100000   0   0   0   0   0   2   0   1   0   1   1   0   1   0   2   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   104000   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   2   0   2   3   0   1   0   1   0   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   108000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   112000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   115000   0   0   0   1   0   0   1   0   0   0   0   1   1   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   120000   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0
##   126710   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   670 680 700 710 720
##   64000    0   0   0   0   0
##   77000    0   0   0   0   0
##   78256    0   0   0   0   0
##   82000    1   0   0   0   0
##   85000    0   0   1   0   1
##   86000    0   1   0   0   0
##   88000    0   0   0   0   0
##   88500    0   0   0   0   0
##   90000    0   0   0   0   0
##   92000    0   0   0   1   0
##   93000    0   0   0   0   0
##   95000    2   0   0   0   0
##   96000    0   0   0   0   0
##   96500    0   0   0   0   0
##   97000    0   0   0   0   0
##   98000    1   1   0   1   0
##   99000    0   0   0   0   0
##   100000   0   0   0   1   0
##   100400   0   0   0   0   0
##   101000   0   0   0   0   0
##   101100   0   0   0   0   0
##   101600   0   0   0   0   0
##   102500   1   0   0   0   0
##   103000   0   0   0   0   0
##   104000   0   0   0   0   0
##   105000   0   1   0   0   0
##   106000   0   2   0   0   0
##   107000   0   0   0   0   0
##   107300   0   0   0   0   0
##   107500   0   0   0   0   0
##   108000   0   0   0   0   0
##   110000   0   0   0   0   0
##   112000   1   1   0   0   0
##   115000   0   0   0   1   0
##   118000   0   0   0   0   0
##   120000   1   0   1   0   0
##   126710   0   0   0   0   0
##   130000   0   0   0   0   0
##   145800   0   0   0   0   0
##   146000   0   0   0   0   0
##   162000   0   0   1   0   0
##   220000   0   0   0   0   0
"Students with gmat_tot>=560 are "
## [1] "Students with gmat_tot>=560 are "
xtabs(~salary+gmat_qpc,data=job.df)
##         gmat_qpc
## salary   39 43 50 52 55 56 60 64 67 68 71 72 74 75 77 78 79 81 82 83 84 85
##   64000   0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   86000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   88000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   88500   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0
##   92000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   95000   0  0  1  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  1  1  0
##   96000   0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##   96500   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   97000   0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0
##   98000   1  0  0  0  0  0  0  0  0  0  0  2  0  1  0  0  0  0  1  1  0  0
##   99000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   100000  0  0  0  0  0  0  0  0  0  1  0  2  0  0  1  0  0  1  0  0  1  0
##   100400  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   101000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101600  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   103000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0
##   105000  0  0  0  0  0  0  1  1  0  1  0  2  1  0  0  0  1  1  0  0  1  0
##   106000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##   107000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0
##   107300  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   107500  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   110000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   112000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   115000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  1  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   120000  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0
##   126710  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   162000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##         gmat_qpc
## salary   87 88 89 90 91 93 94 95 96 97 98 99
##   64000   0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  0  0  0  0  0  0  0  0  0  0  0
##   82000   1  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  0  0  1  0  0  1  0  1  0  0  0
##   86000   1  0  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  0  0  0  0  0  0  0  0  0
##   88500   1  0  0  0  0  0  0  0  0  0  0  0
##   90000   0  0  1  0  0  0  0  0  0  0  0  0
##   92000   0  0  1  0  0  0  0  0  1  0  0  1
##   93000   0  0  1  0  0  0  0  0  0  0  0  0
##   95000   1  0  0  0  0  0  0  1  0  0  0  0
##   96000   1  0  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  0  0  0  0  0  0  0
##   98000   0  0  1  0  0  2  0  0  0  1  0  0
##   99000   0  0  0  0  0  0  0  0  0  0  0  0
##   100000  0  0  0  0  1  1  0  0  0  0  0  1
##   100400  0  0  0  0  0  0  0  0  0  0  0  0
##   101000  0  0  2  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  1  0  0  0  0  0  0  0
##   101600  0  0  0  0  0  0  0  0  1  0  0  0
##   102500  0  0  0  0  0  0  0  0  0  0  0  0
##   103000  0  0  0  0  0  0  0  0  0  1  0  0
##   104000  0  0  0  0  0  0  0  0  0  0  0  0
##   105000  0  0  1  0  0  0  0  1  0  0  0  0
##   106000  0  0  0  0  0  0  0  0  1  0  0  0
##   107000  0  0  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  0  0  1  0  0  0  0  0  0
##   108000  0  0  0  0  0  0  0  0  0  0  0  0
##   110000  0  0  1  0  0  0  0  0  0  0  0  0
##   112000  0  0  0  0  1  0  0  0  1  0  0  0
##   115000  0  0  0  0  0  0  0  2  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  0
##   120000  1  0  0  0  0  0  0  1  0  0  0  0
##   126710  0  0  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  1  0  0  0  0  0  0  0  0  0
##   145800  0  1  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0
##   162000  0  0  0  0  0  0  0  0  0  0  1  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0
xtabs(~salary+gmat_vpc,data=job.df)
##         gmat_vpc
## salary   30 33 37 45 50 54 58 62 63 67 71 74 75 78 81 84 87 89 90 91 92 93
##   64000   0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   77000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0
##   86000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   88000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   88500   0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  1  0  0  0  1
##   92000   0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  0  0  0  0  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   95000   0  0  0  0  0  0  0  1  0  0  2  0  0  0  2  0  0  1  0  0  0  0
##   96000   0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  0  0  1  0  0  0  1
##   96500   1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  0  0  0  0
##   98000   0  0  1  0  0  0  1  1  0  1  1  0  0  0  1  0  0  0  1  1  0  0
##   99000   0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   100000  0  0  0  0  1  0  0  0  0  1  1  0  0  1  1  2  1  0  0  0  1  0
##   100400  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101000  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   101600  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   103000  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   105000  0  0  0  0  1  1  1  0  0  0  2  1  0  0  1  1  1  0  0  1  0  0
##   106000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##   107000  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   112000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  1
##   115000  0  1  0  1  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
##   120000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0
##   126710  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##   145800  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   162000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   220000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##         gmat_vpc
## salary   95 96 97 98 99
##   64000   0  0  0  0  0
##   77000   0  0  0  1  0
##   78256   0  0  0  0  0
##   82000   1  0  0  0  0
##   85000   0  0  0  2  0
##   86000   0  1  0  0  0
##   88000   0  0  0  0  0
##   88500   0  0  0  0  0
##   90000   0  0  0  0  0
##   92000   0  1  0  0  0
##   93000   0  0  0  1  0
##   95000   0  1  0  0  0
##   96000   0  0  0  0  0
##   96500   0  0  0  0  0
##   97000   0  0  0  0  0
##   98000   0  0  0  2  0
##   99000   0  0  0  0  0
##   100000  0  0  0  0  0
##   100400  1  0  0  0  0
##   101000  0  0  0  0  0
##   101100  0  0  0  0  0
##   101600  0  0  0  0  0
##   102500  0  0  1  0  0
##   103000  0  0  0  0  0
##   104000  0  0  0  0  0
##   105000  0  1  0  0  0
##   106000  0  1  0  0  1
##   107000  0  0  0  0  0
##   107300  1  0  0  0  0
##   107500  0  0  0  0  0
##   108000  0  0  0  0  0
##   110000  0  0  0  0  0
##   112000  0  0  0  0  0
##   115000  1  0  0  1  0
##   118000  0  0  0  0  0
##   120000  2  0  0  0  0
##   126710  0  0  0  0  0
##   130000  0  0  0  0  0
##   145800  0  0  0  0  0
##   146000  1  0  0  0  0
##   162000  0  0  0  0  0
##   220000  0  0  0  0  0
xtabs(~salary+gmat_tpc,data=job.df)
##         gmat_tpc
## salary   51 52 58 61 62 65 68 69 71 72 75 78 79 80 81 83 84 85 86 87 88 89
##   64000   0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   86000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   90000   0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  1
##   92000   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   93000   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0
##   95000   0  0  0  1  0  0  0  0  0  2  0  0  0  0  0  0  0  0  2  0  0  0
##   96000   0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  1  0  0  0  0  0  0
##   96500   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0
##   98000   0  0  0  0  0  0  0  0  0  1  3  1  0  0  1  0  0  0  1  0  0  0
##   99000   0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   100000  0  0  0  0  0  0  0  0  1  1  0  1  0  0  0  0  1  0  1  0  0  1
##   100400  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   101000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0
##   101100  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101600  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   103000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0
##   104000  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   105000  0  0  0  0  0  0  1  1  0  0  2  3  0  0  0  1  0  0  0  0  0  1
##   106000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   107000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   112000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   115000  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0  1
##   118000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   120000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0
##   126710  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   162000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   220000  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##         gmat_tpc
## salary   90 91 93 94 95 96 97 98 99
##   64000   0  0  0  0  0  0  0  0  0
##   77000   0  0  0  0  1  0  0  0  0
##   78256   0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  1  0  0  0  0
##   85000   0  0  0  1  0  0  0  1  1
##   86000   1  0  0  0  0  1  0  0  0
##   88000   0  0  1  0  0  0  0  0  0
##   88500   0  0  0  0  0  0  0  0  0
##   90000   0  0  1  0  0  0  0  0  0
##   92000   0  0  0  0  1  0  0  1  0
##   93000   0  0  0  0  0  0  0  0  0
##   95000   0  0  0  0  2  0  0  0  0
##   96000   0  0  1  0  0  0  0  0  0
##   96500   0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  0  0  0  0
##   98000   0  0  0  0  0  1  1  0  1
##   99000   0  0  0  0  0  0  0  0  0
##   100000  0  0  2  0  0  0  0  0  1
##   100400  0  0  0  0  0  0  0  0  0
##   101000  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  1  0  0  0  0
##   101600  0  1  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  1  0  0  0
##   103000  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  0  0  0
##   105000  0  0  1  0  0  1  0  0  0
##   106000  0  0  0  0  0  2  0  0  0
##   107000  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  1  0  0  0  0  0
##   107500  0  1  0  0  0  0  0  0  0
##   108000  0  0  0  0  0  0  0  0  0
##   110000  0  1  0  0  0  0  0  0  0
##   112000  0  0  0  0  1  0  1  0  0
##   115000  0  0  0  0  0  0  0  1  0
##   118000  0  0  0  0  0  0  0  0  0
##   120000  0  0  0  0  1  0  0  1  0
##   126710  0  0  0  0  0  0  0  0  0
##   130000  0  0  1  0  0  0  0  0  0
##   145800  0  0  0  0  0  0  0  0  0
##   146000  0  1  0  0  0  0  0  0  0
##   162000  0  0  0  0  0  0  0  1  0
##   220000  0  0  0  0  0  0  0  0  0
xtabs(~salary+frstlang,data=job.df)
##         frstlang
## salary   English Others
##   64000        1      0
##   77000        1      0
##   78256        1      0
##   82000        1      0
##   85000        4      0
##   86000        2      0
##   88000        1      0
##   88500        1      0
##   90000        3      0
##   92000        3      0
##   93000        3      0
##   95000        7      0
##   96000        4      0
##   96500        1      0
##   97000        2      0
##   98000        8      2
##   99000        0      1
##   100000       9      0
##   100400       1      0
##   101000       2      0
##   101100       1      0
##   101600       1      0
##   102500       1      0
##   103000       1      0
##   104000       1      1
##   105000      11      0
##   106000       3      0
##   107000       1      0
##   107300       0      1
##   107500       1      0
##   108000       2      0
##   110000       1      0
##   112000       3      0
##   115000       5      0
##   118000       0      1
##   120000       4      0
##   126710       1      0
##   130000       1      0
##   145800       1      0
##   146000       1      0
##   162000       1      0
##   220000       0      1
"Students with English as first language are mostly preferred and get higher salaries compared to those whose first language is not English.
"
## [1] "Students with English as first language are mostly preferred and get higher salaries compared to those whose first language is not English.\n"
xtabs(~salary+work_yrs,data=job.df)
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0
"From the above table that a minimum of 1 years of work experience is necessary
"
## [1] "From the above table that a minimum of 1 years of work experience is necessary\n"
xtabs(~salary+satis,data=job.df)
##         satis
## salary   3 4 5 6 7
##   64000  0 0 0 0 1
##   77000  0 0 0 1 0
##   78256  0 0 1 0 0
##   82000  0 0 0 0 1
##   85000  0 0 1 3 0
##   86000  0 0 2 0 0
##   88000  0 0 0 0 1
##   88500  0 0 0 1 0
##   90000  0 0 2 0 1
##   92000  0 0 1 1 1
##   93000  0 0 1 2 0
##   95000  1 1 1 2 2
##   96000  0 0 1 1 2
##   96500  0 0 0 1 0
##   97000  0 0 0 1 1
##   98000  0 0 2 5 3
##   99000  0 0 0 1 0
##   100000 0 0 1 6 2
##   100400 0 0 0 0 1
##   101000 0 0 1 1 0
##   101100 0 0 0 1 0
##   101600 0 0 0 1 0
##   102500 0 0 1 0 0
##   103000 0 0 0 1 0
##   104000 0 0 1 1 0
##   105000 0 0 4 6 1
##   106000 0 0 0 2 1
##   107000 0 0 1 0 0
##   107300 0 0 0 0 1
##   107500 0 0 1 0 0
##   108000 0 0 0 2 0
##   110000 0 0 1 0 0
##   112000 0 0 0 2 1
##   115000 0 0 3 2 0
##   118000 0 0 0 0 1
##   120000 0 0 2 2 0
##   126710 0 0 0 1 0
##   130000 0 0 0 0 1
##   145800 0 0 0 1 0
##   146000 0 0 0 1 0
##   162000 0 0 1 0 0
##   220000 0 0 0 1 0
"Better satisfaction lead to better starting salaries"
## [1] "Better satisfaction lead to better starting salaries"
chisq.test(job.df$age,job.df$salary)
## Warning in chisq.test(job.df$age, job.df$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$age and job.df$salary
## X-squared = 717.62, df = 574, p-value = 3.929e-05
chisq.test(job.df$sex,job.df$salary)
## Warning in chisq.test(job.df$sex, job.df$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$sex and job.df$salary
## X-squared = 52.681, df = 41, p-value = 0.1045
chisq.test(job.df$gmat_tot,job.df$salary)
## Warning in chisq.test(job.df$gmat_tot, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$gmat_tot and job.df$salary
## X-squared = 927.24, df = 820, p-value = 0.005279
chisq.test(job.df$gmat_qpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_qpc, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$gmat_qpc and job.df$salary
## X-squared = 1464.3, df = 1353, p-value = 0.018
chisq.test(job.df$gmat_vpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_vpc, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$gmat_vpc and job.df$salary
## X-squared = 1183.3, df = 1066, p-value = 0.006802
chisq.test(job.df$gmat_tpc,job.df$salary)
## Warning in chisq.test(job.df$gmat_tpc, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$gmat_tpc and job.df$salary
## X-squared = 1422.2, df = 1230, p-value = 0.0001065
chisq.test(job.df$s_avg,job.df$salary)
## Warning in chisq.test(job.df$s_avg, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$s_avg and job.df$salary
## X-squared = 792.97, df = 861, p-value = 0.9524
chisq.test(job.df$f_avg,job.df$salary)
## Warning in chisq.test(job.df$f_avg, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$f_avg and job.df$salary
## X-squared = 596.28, df = 574, p-value = 0.2518
chisq.test(job.df$work_yrs,job.df$salary)
## Warning in chisq.test(job.df$work_yrs, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$work_yrs and job.df$salary
## X-squared = 535.23, df = 451, p-value = 0.003809
chisq.test(job.df$frstlang,job.df$salary)
## Warning in chisq.test(job.df$frstlang, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$frstlang and job.df$salary
## X-squared = 69.847, df = 41, p-value = 0.003296
chisq.test(job.df$satis,job.df$salary)
## Warning in chisq.test(job.df$satis, job.df$salary): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job.df$satis and job.df$salary
## X-squared = 109.1, df = 164, p-value = 0.9997

The results of the Chi-Square tests tell us that age, GMAT percentiles, work experience and first language are factors that affect starting salary (i.e p < 0.05), whereas sex, average GPA for Spring and Fall semesters, quartile ranking and satisfaction with degree have no effect on the salary (p > 0.05). This, however, is in contrast with the results obtained from the plots that we observed earlier.

mbass<-read.csv(paste("MBA Starting Salaries Data.csv"),sep=",")
t.test(mbass$age,mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$age and mbass$salary
## t = -12.67, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45058.15 -32938.51
## sample estimates:
##   mean of x   mean of y 
##    27.35766 39025.68978
t.test(mbass$sex, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$sex and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45084.26 -32964.62
## sample estimates:
##    mean of x    mean of y 
##     1.248175 39025.689781
t.test(mbass$gmat_tot, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$gmat_tot and mbass$salary
## t = -12.477, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -44466.06 -32346.41
## sample estimates:
##  mean of x  mean of y 
##   619.4526 39025.6898
t.test(mbass$gmat_qpc, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$gmat_qpc and mbass$salary
## t = -12.652, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45004.87 -32885.22
## sample estimates:
##   mean of x   mean of y 
##    80.64234 39025.68978
t.test(mbass$gmat_vpc, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$gmat_vpc and mbass$salary
## t = -12.653, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45007.19 -32887.55
## sample estimates:
##   mean of x   mean of y 
##    78.32117 39025.68978
t.test(mbass$gmat_tpc, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$gmat_tpc and mbass$salary
## t = -12.651, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45001.32 -32881.67
## sample estimates:
##   mean of x   mean of y 
##    84.19708 39025.68978
t.test(mbass$s_avg, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$s_avg and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45082.49 -32962.84
## sample estimates:
##    mean of x    mean of y 
##     3.025401 39025.689781
t.test(mbass$f_avg, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$f_avg and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45082.45 -32962.81
## sample estimates:
##    mean of x    mean of y 
##     3.061533 39025.689781
t.test(mbass$work_yrs, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$work_yrs and mbass$salary
## t = -12.677, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45081.64 -32961.99
## sample estimates:
##    mean of x    mean of y 
##     3.872263 39025.689781
t.test(mbass$frstlang, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$frstlang and mbass$salary
## t = -12.678, df = 273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -45084.40 -32964.75
## sample estimates:
##    mean of x    mean of y 
##     1.116788 39025.689781
t.test(mbass$satis, mbass$salary)
## 
##  Welch Two Sample t-test
## 
## data:  mbass$satis and mbass$salary
## t = -12.622, df = 273.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -44913.49 -32793.53
## sample estimates:
##  mean of x  mean of y 
##   172.1788 39025.6898

In all the above T-Tests, we see the alternative hypothesis being stated as “true difference is not equal to zero” and we also get p < 2.2e - 16, which means that all the factors somehow influence the starting salary. This is in contrast to our analysis using graphs as well as Chi-Squared tests.

fit <- lm(salary ~gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc, data = job.df)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc, 
##     data = job.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40370  -8250  -2164   5253 100097 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 109539.54   48054.24   2.279   0.0248 *
## gmat_tot        55.01     181.71   0.303   0.7627  
## gmat_qpc       718.40     541.90   1.326   0.1880  
## gmat_vpc       546.10     543.85   1.004   0.3178  
## gmat_tpc     -1663.16     801.57  -2.075   0.0406 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared:  0.06089,    Adjusted R-squared:  0.02256 
## F-statistic: 1.589 on 4 and 98 DF,  p-value: 0.1834

Gmat_tpc is a significant variable in this model. The multiple R squared value indicates that the model accounts for 6% of the variance in the variables The residual error (17670) can be thought of as the average error in predicting salary using the various gmat data available

fit <- lm(salary ~age+sex+frstlang, data = job.df)
summary(fit)
## 
## Call:
## lm(formula = salary ~ age + sex + frstlang, data = job.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28723  -9214  -1296   5524  80180 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     39233.7    13770.5   2.849  0.00533 ** 
## age              2409.7      508.7   4.737 7.26e-06 ***
## sexFemale       -4343.6     3391.8  -1.281  0.20333    
## frstlangOthers   8541.4     6531.3   1.308  0.19398    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15480 on 99 degrees of freedom
## Multiple R-squared:  0.2714, Adjusted R-squared:  0.2493 
## F-statistic: 12.29 on 3 and 99 DF,  p-value: 6.687e-07

Age is a significant variable in this model The multiple R squared value indicates that the model accounts for 27% of the variance in the variables The residual error (15480) can be thought of as the average error in predicting salary using the various age data available

fit <- lm(salary ~s_avg+f_avg+work_yrs+satis, data = job.df)
summary(fit)
## 
## Call:
## lm(formula = salary ~ s_avg + f_avg + work_yrs + satis, data = job.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33329  -7748   -853   3885  87689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 101048.7    20095.5   5.028 2.23e-06 ***
## s_avg         1588.0     4987.7   0.318    0.751    
## f_avg        -1186.1     3885.5  -0.305    0.761    
## work_yrs      2649.6      572.3   4.630 1.12e-05 ***
## satis        -1531.7     2075.3  -0.738    0.462    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16180 on 98 degrees of freedom
## Multiple R-squared:  0.2125, Adjusted R-squared:  0.1804 
## F-statistic: 6.611 on 4 and 98 DF,  p-value: 9.407e-05

work experience is a significant variable in this model. The multiple R squared value indicates that the model accounts for 21% of the variance in the variables The residual error (16180) can be thought of as the average error in predicting salary using the various work years data available

These regression models tell us how each variable is suited for a straight-line equation of the form y=mx+c (y being salary and x being the variables) with respect to starting salary. Salary is dependent on age, work exp, gmat_tpc as p-value<0.05

library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
mbasal.df<-read.csv(paste("MBA Starting Salaries Data (1).csv"),sep=",")
train <- mbasal.df[1:198,]
test <- mbasal.df[199:274,]
train$sex[train$sex<2 & train$sex>0] <- 0
train$sex[train$sex<3 & train$sex>1] <- 1
model <- glm(sex ~.,family=binomial(link='logit'),data=train)
summary(model)
## 
## Call:
## glm(formula = sex ~ ., family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5784  -0.7701  -0.6039   1.0382   1.9610  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  6.698e+00  6.582e+00   1.018   0.3088  
## age         -1.382e-01  1.073e-01  -1.288   0.1976  
## gmat_tot    -1.904e-03  1.404e-02  -0.136   0.8921  
## gmat_qpc    -7.837e-02  4.611e-02  -1.700   0.0892 .
## gmat_vpc    -1.769e-02  4.575e-02  -0.387   0.6990  
## gmat_tpc     8.354e-02  7.368e-02   1.134   0.2568  
## s_avg       -6.660e-01  1.383e+00  -0.482   0.6300  
## f_avg        1.001e-01  4.067e-01   0.246   0.8056  
## quarter     -6.379e-01  5.105e-01  -1.250   0.2114  
## work_yrs     5.536e-02  1.193e-01   0.464   0.6425  
## frstlang     6.850e-01  6.055e-01   1.131   0.2579  
## salary      -2.357e-06  3.818e-06  -0.617   0.5370  
## satis       -6.460e-04  5.149e-04  -1.255   0.2096  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 232.04  on 197  degrees of freedom
## Residual deviance: 210.58  on 185  degrees of freedom
## AIC: 236.58
## 
## Number of Fisher Scoring iterations: 5
anova(model, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: sex
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                       197     232.04            
## age       1   0.5058       196     231.53  0.47695   
## gmat_tot  1   1.7178       195     229.81  0.18998   
## gmat_qpc  1  10.5691       194     219.24  0.00115 **
## gmat_vpc  1   0.2286       193     219.02  0.63254   
## gmat_tpc  1   1.6816       192     217.33  0.19472   
## s_avg     1   2.0856       191     215.25  0.14869   
## f_avg     1   0.0708       190     215.18  0.79011   
## quarter   1   1.7875       189     213.39  0.18124   
## work_yrs  1   0.1384       188     213.25  0.70991   
## frstlang  1   0.9814       187     212.27  0.32186   
## salary    1   0.0318       186     212.24  0.85853   
## satis     1   1.6549       185     210.58  0.19829   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

larger p-value here indicates that the model without the variable explains more or less the same amount of variation. in this the lowest value of p is for gmat_qpc

library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
fitted.results <- predict(model,newdata=subset(test,select=c(1,2,3,4,5,6,7,8,9,10,11,12,13)),type='response')
fitted.results <- ifelse(fitted.results > 0.5,1,0)                          
misClasificError <- mean(fitted.results != test$salary)
print(paste('Accuracy',1-misClasificError))
## [1] "Accuracy 0.289473684210526"
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
p <- predict(model, newdata=subset(test,select=c(1,2,3,4,5,6,7,8,9,10,11,12,13)), type="response")
pr <- prediction(p, test$sex)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)

auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.5230415