Reading Dataset

mba.df<-read.csv(paste("MBASSD.csv"))
View(mba.df)

Summary and description of given statistics

summary(mba.df)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
library(psych)
describe(mba.df)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

Attaching the dataset

attach(mba.df)

Draw Box Plots / Bar Plots to visualize the distribution of each variable independently

Histogram of age distribution

hist(mba.df$age, main="Histogram of age distribution",xlab="Age", col="purple")

Histogram of gender

gender=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(gender,col = "purple",main = "Histogram of sex distribution")

Boxplot of gmat total

boxplot(mba.df$gmat_tot, main="Boxplot of gmat total", xlab="Gmat total", 
        col="maroon")

Boxplot of Work experience

boxplot(mba.df$work_yrs, main="Boxplot of work experience", xlab="Work (years)", 
        col="yellow")

Histogram of salary distribution

hist(mba.df$salary, main="Boxplot of mba salary", xlab="salary", 
     col="dark blue")

Histogram of First language

flang=factor(mba.df$frstlang, levels=c(1,2), labels=c("English","Others"))
plot(flang,col="yellow", main="Histogram of first language")

Plot for satisfaction level

sl<- mba.df[ which(mba.df$satis<='7'), ]
hist(sl$satis,breaks =5,col="dark blue",xlab="Satisfaction level", 
     main="Histogram for Satisfaction")

Scatterplots how to understand how are the variables correlated pair-wise

gmat total v/s age

library(car)    
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(gmat_tot~age,     data=mba.df,
            main="scatterplot of gmat total per age",
            xlab="age",ylab="gmat total")

salary v/s sex

scatterplot(age~salary, data=mba.df,
            main="scatterplot of gmat age distribution per sex",
            xlab="age", ylab="salary")

salary v/s work experience

scatterplot(work_yrs~salary, data=mba.df,
            main="scatterplot of salary and work experience", 
            xlab ="work experience in years", ylab="salary" )

salary v/s gmat total

scatterplot(salary~gmat_tot, data=mba.df, 
            main="scatterplot of salary and gmat total", 
            xlab="gmat total", ylab="salary")

Corrgarm

library(corrgram)
corrgram(mba.df, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of the dataset")

Variance-Covariance Matrix

cov(mba.df)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05
var(mba.df)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

Subset of the dataset consisting of only those people who actually got a job.

emp.df <- subset(mba.df, salary>0 & salary!= 998 & salary!=999)
View(emp.df)

Analysis of Subset

aggregate(age~sex, data = emp.df, mean) 
##   sex      age
## 1   1 27.08333
## 2   2 26.06452
aggregate(salary~age, data = emp.df, mean) 
##    age    salary
## 1   22  85000.00
## 2   23  91651.20
## 3   24 101518.75
## 4   25  99086.96
## 5   26 101665.00
## 6   27 102214.29
## 7   28 103625.00
## 8   29 102083.33
## 9   30 109916.67
## 10  31 100500.00
## 11  32 107300.00
## 12  33 118000.00
## 13  34 105000.00
## 14  39 112000.00
## 15  40 183000.00
aggregate(salary~sex, data=emp.df, mean)
##   sex    salary
## 1   1 104970.97
## 2   2  98524.39
aggregate(salary ~ gmat_tot, data=emp.df, mean)
##    gmat_tot   salary
## 1       500 158250.0
## 2       520  78256.0
## 3       530  99500.0
## 4       540 104000.0
## 5       550 112236.7
## 6       560  94000.0
## 7       570 103857.1
## 8       580  99875.0
## 9       590  97000.0
## 10      600 107666.7
## 11      610  96200.0
## 12      620 104108.3
## 13      630 105812.5
## 14      640 110000.0
## 15      650 101285.7
## 16      660  92480.0
## 17      670 100642.9
## 18      680 102166.7
## 19      700 122333.3
## 20      710 101250.0
## 21      720  85000.0

T-test

t.test(emp.df)
## 
##  One Sample t-test
## 
## data:  emp.df
## t = 10.492, df = 1338, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  6500.198 9490.068
## sample estimates:
## mean of x 
##  7995.133
t.test(salary~sex, data=emp.df)
## 
##  Welch Two Sample t-test
## 
## data:  salary by sex
## t = 1.3628, df = 38.115, p-value = 0.1809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3128.55 16021.72
## sample estimates:
## mean in group 1 mean in group 2 
##       104970.97        98524.39
t.test(salary~frstlang, data=emp.df)
## 
##  Welch Two Sample t-test
## 
## data:  salary by frstlang
## t = -1.1202, df = 6.0863, p-value = 0.3049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -59933.62  22202.25
## sample estimates:
## mean in group 1 mean in group 2 
##        101748.6        120614.3

Chisquare test

chisq.test(emp.df$work_yrs,emp.df$satis,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  emp.df$work_yrs and emp.df$satis
## X-squared = 131.13, df = NA, p-value = 0.02399
#Null hypothesis is rejected, work years and satisfaction points are not independent

chisq.test(emp.df$sex,emp.df$gmat_tot,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  emp.df$sex and emp.df$gmat_tot
## X-squared = 18.554, df = NA, p-value = 0.5982
#Failed to reject null hypothesis,Gender has no role to play on gmat total

chisq.test(emp.df$sex,emp.df$salary,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  emp.df$sex and emp.df$salary
## X-squared = 52.681, df = NA, p-value = 0.03398
#Null hypothesis rejected, Gender plays a role to play on salary

chisq.test(emp.df$frstlang,emp.df$salary,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  emp.df$frstlang and emp.df$salary
## X-squared = 69.847, df = NA, p-value = 0.02349
# Null hypothesis rejected, First language plays a role on salary

Regression Model

m1 <- lm(salary ~ gmat_tot + sex + s_avg+f_avg+frstlang, data = emp.df)
summary(m1)
## 
## Call:
## lm(formula = salary ~ gmat_tot + sex + s_avg + f_avg + frstlang, 
##     data = emp.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -37896  -8763  -1578   5154 104055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 89490.94   25748.69   3.476 0.000764 ***
## gmat_tot      -27.28      33.62  -0.811 0.419067    
## sex         -7102.35    3678.66  -1.931 0.056441 .  
## s_avg       11401.42    4991.92   2.284 0.024554 *  
## f_avg       -5822.22    3861.27  -1.508 0.134843    
## frstlang    20902.76    6711.49   3.114 0.002423 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16810 on 97 degrees of freedom
## Multiple R-squared:  0.1579, Adjusted R-squared:  0.1145 
## F-statistic: 3.638 on 5 and 97 DF,  p-value: 0.004649
m2<- lm(salary ~ work_yrs + sex + f_avg+s_avg+frstlang, data = emp.df)
summary(m2)
## 
## Call:
## lm(formula = salary ~ work_yrs + sex + f_avg + s_avg + frstlang, 
##     data = emp.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31846  -9078  -1992   5210  83864 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  75605.2    16094.4   4.698 8.68e-06 ***
## work_yrs      2229.5      572.6   3.893 0.000182 ***
## sex          -5829.1     3444.0  -1.693 0.093760 .  
## f_avg        -1172.7     3805.0  -0.308 0.758581    
## s_avg         4562.8     4905.2   0.930 0.354584    
## frstlang     15281.9     6426.9   2.378 0.019377 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15690 on 97 degrees of freedom
## Multiple R-squared:  0.2668, Adjusted R-squared:  0.229 
## F-statistic: 7.058 on 5 and 97 DF,  p-value: 1.15e-05
m3<-lm(salary ~ work_yrs+gmat_tot + sex + f_avg+s_avg+frstlang, data = emp.df)
summary(m3)
## 
## Call:
## lm(formula = salary ~ work_yrs + gmat_tot + sex + f_avg + s_avg + 
##     frstlang, data = emp.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32652  -8940  -1709   5186  83182 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 82356.30   24206.88   3.402 0.000976 ***
## work_yrs     2201.83     579.92   3.797 0.000257 ***
## gmat_tot      -11.90      31.77  -0.375 0.708712    
## sex         -5886.39    3462.79  -1.700 0.092388 .  
## f_avg       -1153.74    3822.28  -0.302 0.763422    
## s_avg        4851.02    4986.79   0.973 0.333110    
## frstlang    15101.77    6473.46   2.333 0.021743 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15760 on 96 degrees of freedom
## Multiple R-squared:  0.2678, Adjusted R-squared:  0.2221 
## F-statistic: 5.853 on 6 and 96 DF,  p-value: 3.114e-05

Model 3 is a better model

Compare the remaining subset of those people who did not get a job and compare them with those people who got a job.

notemp.df <- subset(mba.df, salary==0)
View(notemp.df)

Analysis

aggregate(age~sex, data = notemp.df, mean) 
##   sex      age
## 1   1 28.29851
## 2   2 29.13043
aggregate(age~gmat_tot, data=notemp.df, mean)
##    gmat_tot      age
## 1       450 26.00000
## 2       480 43.00000
## 3       510 30.50000
## 4       530 30.33333
## 5       540 29.66667
## 6       550 27.00000
## 7       560 28.50000
## 8       570 27.57143
## 9       580 28.00000
## 10      590 34.66667
## 11      600 24.00000
## 12      610 28.77778
## 13      620 27.25000
## 14      630 32.20000
## 15      640 26.00000
## 16      650 32.00000
## 17      660 26.66667
## 18      670 26.00000
## 19      680 28.00000
## 20      700 32.00000
## 21      710 25.25000
## 22      720 25.00000
## 23      730 25.00000
## 24      740 27.00000
## 25      750 27.00000
## 26      760 32.00000
aggregate(age ~work_yrs, data=notemp.df, mean)
##    work_yrs      age
## 1         0 24.00000
## 2         1 25.83333
## 3         2 24.95455
## 4         3 27.00000
## 5         4 27.55556
## 6         5 29.33333
## 7         6 28.50000
## 8         7 32.60000
## 9         8 35.00000
## 10        9 37.00000
## 11       10 31.00000
## 12       11 33.00000
## 13       12 34.50000
## 14       13 42.00000
## 15       16 43.00000
## 16       18 36.00000
## 17       22 45.50000

Chi-Square test

chisq.test(notemp.df$work_yrs,notemp.df$satis,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  notemp.df$work_yrs and notemp.df$satis
## X-squared = 44.974, df = NA, p-value = 0.5392

Null hypothesis is true.

chisq.test(emp.df$sex,emp.df$gmat_tot,simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  emp.df$sex and emp.df$gmat_tot
## X-squared = 18.554, df = NA, p-value = 0.6022

Null hypothesis is true, Gender and gmat total are indpendent