model <- read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
View(model)
summary(model)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
library(psych)
describe(model)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

Visualizing the distribution of each variable independently

Age:

hist(model$age,xlab="age in years",main="age distribution",col="skyblue",breaks=10)

Gender distribution

hist(model$sex,xlab="gender",main="gender distribution",col="lightblue")

1 represents male and 2 represents female

Gmat total score distribution

hist(model$gmat_tot,xlab="total gmat score", main="gmat score frequency distribution",col="lightblue",breaks=20)

hist(model$gmat_qpc,xlab = "quantitative GMAT percentile",main="quantitative distribution", col="lightblue")

Spring mba average

hist(model$s_avg,xlab="spring mba average", main="spring mba average frequency distribution", col="lightblue",breaks=20)

Fall mba average

hist(model$f_avg,xlab="fall mba average", main="fall mba average frequency distribution", col="lightblue",breaks=20)

Work experience distribution

hist(model$work_yrs,xlab="work experience in years",main="work expreience frequency distribution", col="lightblue",breaks=20)

First language distribution

hist(model$frstlang,xlab="first language",main="first lamguage frequency distribution",col="lightblue")

Salary distribution

model1<-model[which(model$salary!='998' & model$salary!='999'& model$salary!='0'),]
hist(model1$salary,xlab="starting salary",main="first salary frequency distribution",col="lightblue")

degree of satisfaction with MBA program (1= low, 7 = high satisfaction)

new <- model[which(model$satis<='7'),]
hist(new$satis,xlab="degree of satisfaction with MBA program",main="frequency distribution of degree of satisfaction",col="lightblue",breaks=5)

Scatterplot between salary and age

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(salary~age,     data=model1,
            main="Scatter plot of salary vs age",
            xlab="age",
            ylab="salary")

Scatterplot between salary and sex

library(car)
scatterplot(salary~sex,data=model1,
            spread=FALSE,
            main="scatterplot of salary and           sex",ylab="salary",xlab="sex")

Scatterplot of salary and first language

scatterplot(salary~frstlang, data=model1,
            main="scatterplot of first language and salary",xlab="first language",ylab="salary")

scatterplot of gmat total and salary

scatterplot(salary~gmat_tot, data=model1,
            main="scatterplot of gmat total score and salary",xlab="gmat total score",ylab="salary")

Scatterplot of salary and work experience

scatterplot(salary~work_yrs, data=model1,
            main="scatterplot of work experience and salary",xlab="work experience in years",ylab="salary")

Scatterplot of salary and satisfaction

scatterplot(salary~satis, data=model1,
            main="scatterplot of satisfaction and salary",xlab="satisfaction",ylab="salary")

Corrgram of the variables from the data set of the people who got jobs

library(corrgram)
corrgram(model1, order=TRUE, lower.panel=panel.shade,
  upper.panel=panel.pie, text.panel=panel.txt,
  main="MBA starting salary analysis Correlogram")

cor(model[, c(1:13)]) 
##                  age          sex    gmat_tot    gmat_qpc    gmat_vpc
## age       1.00000000 -0.028106442 -0.14593840 -0.21616985 -0.04417547
## sex      -0.02810644  1.000000000 -0.05336820 -0.16377435  0.07488782
## gmat_tot -0.14593840 -0.053368202  1.00000000  0.72473781  0.74839187
## gmat_qpc -0.21616985 -0.163774346  0.72473781  1.00000000  0.15218014
## gmat_vpc -0.04417547  0.074887816  0.74839187  0.15218014  1.00000000
## gmat_tpc -0.16990307 -0.008090213  0.84779965  0.65137754  0.66621604
## s_avg     0.14970402  0.127115144  0.11311702 -0.02984873  0.20445365
## f_avg    -0.01744806  0.091663891  0.10442409  0.07370455  0.07592225
## quarter  -0.04967221 -0.133533171 -0.09223903  0.03636638 -0.17460736
## work_yrs  0.85829810 -0.011296374 -0.18235434 -0.23660827 -0.06639049
## frstlang  0.05692649  0.001536205 -0.13503402  0.13892774 -0.38980465
## salary   -0.06257355  0.068858628 -0.05497188 -0.04403293 -0.00613934
## satis    -0.12788825 -0.054602220  0.08255770  0.06060004  0.06262375
##              gmat_tpc       s_avg       f_avg       quarter     work_yrs
## age      -0.169903066  0.14970402 -0.01744806 -4.967221e-02  0.858298096
## sex      -0.008090213  0.12711514  0.09166389 -1.335332e-01 -0.011296374
## gmat_tot  0.847799647  0.11311702  0.10442409 -9.223903e-02 -0.182354339
## gmat_qpc  0.651377538 -0.02984873  0.07370455  3.636638e-02 -0.236608270
## gmat_vpc  0.666216035  0.20445365  0.07592225 -1.746074e-01 -0.066390490
## gmat_tpc  1.000000000  0.11736245  0.07973210 -8.303535e-02 -0.173361859
## s_avg     0.117362449  1.00000000  0.55062139 -7.621166e-01  0.129292714
## f_avg     0.079732099  0.55062139  1.00000000 -4.475064e-01 -0.039056921
## quarter  -0.083035351 -0.76211664 -0.44750637  1.000000e+00 -0.086026406
## work_yrs -0.173361859  0.12929271 -0.03905692 -8.602641e-02  1.000000000
## frstlang -0.103362747 -0.13631308 -0.03705695  9.949226e-02 -0.027866747
## salary    0.004930901  0.14583606  0.02944303 -1.643699e-01  0.009023407
## satis     0.092934266 -0.03268664  0.01089273 -1.267198e-05 -0.109255286
##              frstlang       salary         satis
## age       0.056926486 -0.062573547 -1.278882e-01
## sex       0.001536205  0.068858628 -5.460222e-02
## gmat_tot -0.135034017 -0.054971880  8.255770e-02
## gmat_qpc  0.138927742 -0.044032933  6.060004e-02
## gmat_vpc -0.389804653 -0.006139340  6.262375e-02
## gmat_tpc -0.103362747  0.004930901  9.293427e-02
## s_avg    -0.136313080  0.145836062 -3.268664e-02
## f_avg    -0.037056954  0.029443027  1.089273e-02
## quarter   0.099492259 -0.164369865 -1.267198e-05
## work_yrs -0.027866747  0.009023407 -1.092553e-01
## frstlang  1.000000000 -0.086592096  7.932264e-02
## salary   -0.086592096  1.000000000 -3.352171e-01
## satis     0.079322637 -0.335217114  1.000000e+00

Correlation of salary with other variables

cor(model1$salary, model1)
##            age        sex    gmat_tot  gmat_qpc   gmat_vpc   gmat_tpc
## [1,] 0.4996428 -0.1662887 -0.09067141 0.0141413 -0.1374323 -0.1320178
##          s_avg     f_avg    quarter  work_yrs  frstlang salary      satis
## [1,] 0.1017317 -0.106039 -0.1284853 0.4546663 0.2670195      1 -0.0400506

contingency tables showing the affect of various factors on the salary

 mytable <-xtabs(~salary+sex,data=model1)
    mytable
##         sex
## salary    1  2
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
 mytable1 <-xtabs(~salary+work_yrs+frstlang,data=model1)
    mytable1
## , , frstlang = 1
## 
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 6 0 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 0 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 1 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 0 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  0  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  0  0
## 
## , , frstlang = 2
## 
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 0 0 0 0 0 0 0  0  0  0
##   77000  0 0 0 0 0 0 0 0 0  0  0  0
##   78256  0 0 0 0 0 0 0 0 0  0  0  0
##   82000  0 0 0 0 0 0 0 0 0  0  0  0
##   85000  0 0 0 0 0 0 0 0 0  0  0  0
##   86000  0 0 0 0 0 0 0 0 0  0  0  0
##   88000  0 0 0 0 0 0 0 0 0  0  0  0
##   88500  0 0 0 0 0 0 0 0 0  0  0  0
##   90000  0 0 0 0 0 0 0 0 0  0  0  0
##   92000  0 0 0 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 0 0 0 0 0  0  0  0
##   95000  0 0 0 0 0 0 0 0 0  0  0  0
##   96000  0 0 0 0 0 0 0 0 0  0  0  0
##   96500  0 0 0 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 0 0 0 0 0 0  0  0  0
##   98000  0 0 1 1 0 0 0 0 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 0 0 0 0 0 0 0  0  0  0
##   100400 0 0 0 0 0 0 0 0 0  0  0  0
##   101000 0 0 0 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 0  0  0  0
##   101600 0 0 0 0 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 0 0 0  0  0  0
##   103000 0 0 0 0 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 1 0 0 0 0  0  0  0
##   105000 0 0 0 0 0 0 0 0 0  0  0  0
##   106000 0 0 0 0 0 0 0 0 0  0  0  0
##   107000 0 0 0 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 0 0 0 0 0 0  0  0  0
##   108000 0 0 0 0 0 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 0 0 0  0  0  0
##   112000 0 0 0 0 0 0 0 0 0  0  0  0
##   115000 0 0 0 0 0 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 0 0 0 0 0 0  0  0  0
##   126710 0 0 0 0 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 0 0 0 0 0  0  0  0
##   145800 0 0 0 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  0  0
##   162000 0 0 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0

Chi-square test

t.test(model1$salary,model1$sex)
## 
##  Welch Two Sample t-test
## 
## data:  model1$salary and model1$sex
## t = 58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   99537.17 106521.71
## sample estimates:
##    mean of x    mean of y 
## 1.030307e+05 1.300971e+00

Since, p-value <0.05. So, there is a significant difference between salaries of men and women.

chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 52.681, df = 41, p-value = 0.1045

Regression models

m1<-lm(salary ~ age+sex+gmat_tot+gmat_tpc+gmat_qpc+gmat_vpc+frstlang+quarter, data=model1)
summary(m1)
## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_tpc + gmat_qpc + 
##     gmat_vpc + frstlang + quarter, data = model1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -25167  -7550  -1109   5163  71055 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50739.508  44276.926   1.146   0.2547    
## age          2458.718    525.702   4.677 9.73e-06 ***
## sex         -3456.252   3458.815  -0.999   0.3202    
## gmat_tot        6.807    159.950   0.043   0.9661    
## gmat_tpc    -1429.300    693.662  -2.061   0.0421 *  
## gmat_qpc      796.902    474.955   1.678   0.0967 .  
## gmat_vpc      533.123    473.264   1.126   0.2628    
## frstlang     5868.789   6789.285   0.864   0.3896    
## quarter     -1820.645   1392.015  -1.308   0.1941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15200 on 94 degrees of freedom
## Multiple R-squared:  0.3333, Adjusted R-squared:  0.2766 
## F-statistic: 5.875 on 8 and 94 DF,  p-value: 4.368e-06

p-values of age and overall gmat percentile are less than 0.05. Hence salary depends on these two factors.

m2<- lm(salary~s_avg+f_avg+work_yrs+satis, data=model1)
summary(m2)
## 
## Call:
## lm(formula = salary ~ s_avg + f_avg + work_yrs + satis, data = model1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33329  -7748   -853   3885  87689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 101048.7    20095.5   5.028 2.23e-06 ***
## s_avg         1588.0     4987.7   0.318    0.751    
## f_avg        -1186.1     3885.5  -0.305    0.761    
## work_yrs      2649.6      572.3   4.630 1.12e-05 ***
## satis        -1531.7     2075.3  -0.738    0.462    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16180 on 98 degrees of freedom
## Multiple R-squared:  0.2125, Adjusted R-squared:  0.1804 
## F-statistic: 6.611 on 4 and 98 DF,  p-value: 9.407e-05

p-value of work experience is less than0.05. So, salary depends on it.

m3<- lm(salary~work_yrs+age+gmat_tpc+sex,data=model1)
summary(m3)
## 
## Call:
## lm(formula = salary ~ work_yrs + age + gmat_tpc + sex, data = model1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31132  -8216  -1918   5863  80378 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  57425.9    27148.6   2.115   0.0369 *
## work_yrs       371.1     1090.4   0.340   0.7343  
## age           2300.2     1005.4   2.288   0.0243 *
## gmat_tpc      -143.1      141.9  -1.008   0.3158  
## sex          -4039.5     3400.0  -1.188   0.2377  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15600 on 98 degrees of freedom
## Multiple R-squared:  0.2678, Adjusted R-squared:  0.2379 
## F-statistic: 8.962 on 4 and 98 DF,  p-value: 3.282e-06

First model is the best one.

Comparing people with job and without job

nojob <- model[which(model$salary!='998' & model$salary!='999' & model$salary=='0'),]
model1<- model1[1:90,]
chisq.test(nojob$age,model1$age)
## Warning in chisq.test(nojob$age, model1$age): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$age and model1$age
## X-squared = 229.27, df = 252, p-value = 0.8449
chisq.test(nojob$sex,model1$sex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  nojob$sex and model1$sex
## X-squared = 0.11711, df = 1, p-value = 0.7322
chisq.test(nojob$work_yrs,model1$work_yrs)
## Warning in chisq.test(nojob$work_yrs, model1$work_yrs): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$work_yrs and model1$work_yrs
## X-squared = 117.66, df = 176, p-value = 0.9998
chisq.test(nojob$quarter,model1$quarter)
## Warning in chisq.test(nojob$quarter, model1$quarter): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$quarter and model1$quarter
## X-squared = 110.98, df = 9, p-value < 2.2e-16
chisq.test(nojob$gmat_tot,model1$gmat_tot)
## Warning in chisq.test(nojob$gmat_tot, model1$gmat_tot): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$gmat_tot and model1$gmat_tot
## X-squared = 496.72, df = 500, p-value = 0.533
chisq.test(nojob$gmat_qpc,model1$gmat_qpc)
## Warning in chisq.test(nojob$gmat_qpc, model1$gmat_qpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$gmat_qpc and model1$gmat_qpc
## X-squared = 1305.3, df = 1221, p-value = 0.04635
chisq.test(nojob$gmat_tpc,model1$gmat_tpc)
## Warning in chisq.test(nojob$gmat_tpc, model1$gmat_tpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$gmat_tpc and model1$gmat_tpc
## X-squared = 776.54, df = 784, p-value = 0.5683
chisq.test(nojob$gmat_vpc,model1$gmat_vpc)
## Warning in chisq.test(nojob$gmat_vpc, model1$gmat_vpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$gmat_vpc and model1$gmat_vpc
## X-squared = 716.3, df = 625, p-value = 0.006476
chisq.test(nojob$frstlang,model1$frstlang)
## Warning in chisq.test(nojob$frstlang, model1$frstlang): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  nojob$frstlang and model1$frstlang
## X-squared = 0.0080703, df = 1, p-value = 0.9284
chisq.test(nojob$satis,model1$satis)
## Warning in chisq.test(nojob$satis, model1$satis): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  nojob$satis and model1$satis
## X-squared = 7.502, df = 12, p-value = 0.8227

The factors that are relevant are quartile, quantitativepercentile and verbal percentile in gmat.