TASK 2a

Read data

salary<-read.csv(file="MBA Starting Salaries Data.csv")

summary statistics

summary(salary)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

standard deviation of 1)age

sd(salary$age)
## [1] 3.710666

2)salary

sd(salary$salary)
## [1] 50951.56

3)gmat_qpc,gmat_vpc,gmat_tpc,gmat_tot

sd(salary$gmat_qpc)
## [1] 14.86853
sd(salary$gmat_vpc)
## [1] 16.85966
sd(salary$gmat_tpc)
## [1] 14.02162
sd(salary$gmat_tot)
## [1] 57.53858

boxplot of Age

boxplot(salary$age, main="Age",
              col=c("yellow"),horizontal=TRUE,
              xlab="Age (Years)" )

boxplot of gmat_qpc

boxplot(salary$gmat_qpc, main="gmat_qpc",
              col=c("green"),horizontal=TRUE,
              xlab="percentile" )

boxplot of gmat_tpc

boxplot(salary$gmat_tpc, main="gmat_tpc",
              col=c("blue"),horizontal=TRUE,
              xlab="percentile" )

boxplot of gmat_vpc

boxplot(salary$gmat_vpc, main="gmat_vpc",
              col=c("grey"),horizontal=TRUE,
              xlab="percentile" )

boxplot of gmat_tot

boxplot(salary$gmat_tot, main="gmat_tot",
              col=c("pink"),horizontal=TRUE,
              xlab="gmat_tot" )

boxplot of spring MBA avg

boxplot(salary$s_avg, main="spring MBA avg",
              col=c("red"),horizontal=TRUE,
              xlab="average" )

boxplot of fail MBA avg

boxplot(salary$f_avg, main="fail MBA avg",
              col=c("light blue"),horizontal=TRUE,
              xlab="average" )

boxplot of starting salary

boxplot(salary$salary, main="salary",
              col=c("yellow"),horizontal=TRUE,
              xlab="amount" )

boxplot of work years

boxplot(salary$work_yrs, main="work years",
              col=c("maroon"),horizontal=TRUE,
              xlab="Years" )

scatterplotmatrix of gmat_tpc,salary,age,

library(car)
scatterplotMatrix(formula = ~ gmat_tpc + salary + age, cex=0.6,
                       data=salary, diagonal="histogram")

scatterplotmatrix of f_avg,s_avg,salary,age

library(car)
scatterplotMatrix(formula = ~ s_avg + f_avg + age + salary, cex=0.5,
                       data=salary, diagonal="histogram")

corrgram of intercorrelations

library(corrgram)
corrgram(salary, order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram of salary intercorrelations")

varience matrix of given dataset

var(salary)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

covarience matrix of given dataset

cov(salary)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

TASK 2b

subset of people who got job

employed <- subset(salary, salary>998, select = age:satis)
View(employed)

contengency-tables 1)age and salary they get

aggregate(age~salary,data=employed,mean)
##    salary      age
## 1     999 27.48571
## 2   64000 24.00000
## 3   77000 23.00000
## 4   78256 23.00000
## 5   82000 26.00000
## 6   85000 25.00000
## 7   86000 25.50000
## 8   88000 25.00000
## 9   88500 27.00000
## 10  90000 25.66667
## 11  92000 25.66667
## 12  93000 28.00000
## 13  95000 25.42857
## 14  96000 25.25000
## 15  96500 24.00000
## 16  97000 27.50000
## 17  98000 25.50000
## 18  99000 28.00000
## 19 100000 25.33333
## 20 100400 29.00000
## 21 101000 24.50000
## 22 101100 29.00000
## 23 101600 26.00000
## 24 102500 30.00000
## 25 103000 27.00000
## 26 104000 29.50000
## 27 105000 27.00000
## 28 106000 29.66667
## 29 107000 26.00000
## 30 107300 32.00000
## 31 107500 27.00000
## 32 108000 26.50000
## 33 110000 28.00000
## 34 112000 30.66667
## 35 115000 26.00000
## 36 118000 33.00000
## 37 120000 28.75000
## 38 126710 26.00000
## 39 130000 26.00000
## 40 145800 24.00000
## 41 146000 40.00000
## 42 162000 25.00000
## 43 220000 40.00000

2)gender and salary

aggregate(sex==1~salary,data=employed,mean)
##    salary  sex == 1
## 1     999 0.8571429
## 2   64000 0.0000000
## 3   77000 1.0000000
## 4   78256 0.0000000
## 5   82000 0.0000000
## 6   85000 0.2500000
## 7   86000 0.0000000
## 8   88000 0.0000000
## 9   88500 1.0000000
## 10  90000 1.0000000
## 11  92000 0.6666667
## 12  93000 0.6666667
## 13  95000 0.5714286
## 14  96000 0.7500000
## 15  96500 1.0000000
## 16  97000 1.0000000
## 17  98000 0.6000000
## 18  99000 0.0000000
## 19 100000 0.4444444
## 20 100400 1.0000000
## 21 101000 0.0000000
## 22 101100 1.0000000
## 23 101600 1.0000000
## 24 102500 1.0000000
## 25 103000 1.0000000
## 26 104000 1.0000000
## 27 105000 1.0000000
## 28 106000 0.6666667
## 29 107000 1.0000000
## 30 107300 1.0000000
## 31 107500 1.0000000
## 32 108000 1.0000000
## 33 110000 0.0000000
## 34 112000 1.0000000
## 35 115000 1.0000000
## 36 118000 1.0000000
## 37 120000 0.7500000
## 38 126710 1.0000000
## 39 130000 1.0000000
## 40 145800 1.0000000
## 41 146000 1.0000000
## 42 162000 1.0000000
## 43 220000 0.0000000

3)GMAT percentile and salary

aggregate(gmat_tpc~salary,data=employed,mean)
##    salary gmat_tpc
## 1     999 84.34286
## 2   64000 71.00000
## 3   77000 95.00000
## 4   78256 58.00000
## 5   82000 95.00000
## 6   85000 95.00000
## 7   86000 93.00000
## 8   88000 93.00000
## 9   88500 87.00000
## 10  90000 87.33333
## 11  92000 93.33333
## 12  93000 79.66667
## 13  95000 81.00000
## 14  96000 82.25000
## 15  96500 52.00000
## 16  97000 83.00000
## 17  98000 83.40000
## 18  99000 79.00000
## 19 100000 85.00000
## 20 100400 89.00000
## 21 101000 86.00000
## 22 101100 95.00000
## 23 101600 91.00000
## 24 102500 96.00000
## 25 103000 88.00000
## 26 104000 67.00000
## 27 105000 80.18182
## 28 106000 93.00000
## 29 107000 83.00000
## 30 107300 94.00000
## 31 107500 91.00000
## 32 108000 79.00000
## 33 110000 91.00000
## 34 112000 91.66667
## 35 115000 82.80000
## 36 118000 87.00000
## 37 120000 90.00000
## 38 126710 69.00000
## 39 130000 93.00000
## 40 145800 87.00000
## 41 146000 91.00000
## 42 162000 98.00000
## 43 220000 51.00000

chi-square tests 1)salary and gender

chisq.test(salary,sex)
## Warning in chisq.test(salary, sex): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  salary
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

2)salary and Age

chisq.test(salary,age)
## Warning in chisq.test(salary, age): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  salary
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

3)salary and gmat_tot

chisq.test(salary,gmat_tot)
## Warning in chisq.test(salary, gmat_tot): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  salary
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

4)salary and work years

chisq.test(salary,work_yrs)
## Warning in chisq.test(salary, work_yrs): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  salary
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

5)salary and satis

chisq.test(salary,satis)
## Warning in chisq.test(salary, satis): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  salary
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

T-tests 1)salary and gender

t.test(employed$salary,employed$sex)
## 
##  Welch Two Sample t-test
## 
## data:  employed$salary and employed$sex
## t = 19.224, df = 137, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  69215.68 85088.04
## sample estimates:
##   mean of x   mean of y 
## 77153.12319     1.26087

2)salary and Age

t.test(employed$salary,employed$age)
## 
##  Welch Two Sample t-test
## 
## data:  employed$salary and employed$age
## t = 19.217, df = 137, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  69189.99 85062.35
## sample estimates:
##   mean of x   mean of y 
## 77153.12319    26.95652

3)salary and gmat_tot

t.test(employed$salary,employed$gmat_tot)
## 
##  Welch Two Sample t-test
## 
## data:  employed$salary and employed$gmat_tot
## t = 19.07, df = 137, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  68597.66 84470.03
## sample estimates:
##  mean of x  mean of y 
## 77153.1232   619.2754

4)salary and work years

t.test(employed$salary,employed$work_yrs)
## 
##  Welch Two Sample t-test
## 
## data:  employed$salary and employed$work_yrs
## t = 19.223, df = 137, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  69213.28 85085.64
## sample estimates:
##    mean of x    mean of y 
## 77153.123188     3.666667

5)salary and satis

t.test(employed$salary,employed$satis)
## 
##  Welch Two Sample t-test
## 
## data:  employed$salary and employed$satis
## t = 19.223, df = 137, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  69211.41 85083.77
## sample estimates:
##    mean of x    mean of y 
## 77153.123188     5.528986

Regression analysis model 1

model1<-lm(salary ~ age+sex+gmat_tpc+frstlang+quarter+work_yrs, data=employed)
summary(model1)
## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tpc + frstlang + quarter + 
##     work_yrs, data = employed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -101412  -29018   14532   27276  142207 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   155229      66812   2.323  0.02170 * 
## age            -1778       2459  -0.723  0.47081   
## sex             9460       8977   1.054  0.29387   
## gmat_tpc        -117        300  -0.390  0.69719   
## frstlang      -21882      13077  -1.673  0.09666 . 
## quarter        -9442       3436  -2.748  0.00684 **
## work_yrs        4151       2608   1.591  0.11391   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44950 on 131 degrees of freedom
## Multiple R-squared:  0.1309, Adjusted R-squared:  0.09105 
## F-statistic: 3.287 on 6 and 131 DF,  p-value: 0.0048

Regression analysis model 2

model2<-lm(salary ~ gmat_tot+s_avg+f_avg+work_yrs+satis, data=employed)
summary(model2)
## 
## Call:
## lm(formula = salary ~ gmat_tot + s_avg + f_avg + work_yrs + satis, 
##     data = employed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -102471  -20817    8022   25893  123201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -60752.4    48719.8  -1.247  0.21461    
## gmat_tot      -123.8       63.0  -1.965  0.05147 .  
## s_avg        32579.5    10397.5   3.133  0.00213 ** 
## f_avg        -1093.7     8894.6  -0.123  0.90232    
## work_yrs      1060.2     1301.6   0.815  0.41678    
## satis        20852.6     2989.6   6.975 1.33e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38620 on 132 degrees of freedom
## Multiple R-squared:  0.3533, Adjusted R-squared:  0.3288 
## F-statistic: 14.42 on 5 and 132 DF,  p-value: 2.902e-11

MOdel 1 is more suitable.

TASK 2c

COMPARING THOSE WHO GOT A JOB WITH THOSE WHO DID NOT GET A JOB.

subset of unemployed.

unemployed<-subset(salary,salary== 0)
Employed<-employed[1:90,]

chi-square tests 1)age of people with job and without job

chisq.test(Employed$age,unemployed$age)
## Warning in chisq.test(Employed$age, unemployed$age): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$age and unemployed$age
## X-squared = 277.03, df = 252, p-value = 0.1336

2)work experience of people with job and without job

chisq.test(Employed$work_yrs,unemployed$work_yrs)
## Warning in chisq.test(Employed$work_yrs, unemployed$work_yrs): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$work_yrs and unemployed$work_yrs
## X-squared = 168.43, df = 176, p-value = 0.6456

3)gender of with job and without job people

chisq.test(Employed$sex,unemployed$sex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  Employed$sex and unemployed$sex
## X-squared = 0.0035941, df = 1, p-value = 0.9522

4)gmat total scores comparison

chisq.test(Employed$gmat_tot,unemployed$gmat_tot)
## Warning in chisq.test(Employed$gmat_tot, unemployed$gmat_tot): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$gmat_tot and unemployed$gmat_tot
## X-squared = 482.83, df = 550, p-value = 0.9819

5)gmat pecentile comparison

chisq.test(Employed$gmat_tpc,unemployed$gmat_tpc)
## Warning in chisq.test(Employed$gmat_tpc, unemployed$gmat_tpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$gmat_tpc and unemployed$gmat_tpc
## X-squared = 832.7, df = 812, p-value = 0.2995

6)gmat_vpc,gmat_qpc,s_avg and f_avg

chisq.test(Employed$gmat_vpc,unemployed$gmat_vpc)
## Warning in chisq.test(Employed$gmat_vpc, unemployed$gmat_vpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$gmat_vpc and unemployed$gmat_vpc
## X-squared = 679.89, df = 625, p-value = 0.06324
chisq.test(Employed$gmat_qpc,unemployed$gmat_qpc)
## Warning in chisq.test(Employed$gmat_qpc, unemployed$gmat_qpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$gmat_qpc and unemployed$gmat_qpc
## X-squared = 1417.6, df = 1295, p-value = 0.009397
chisq.test(Employed$s_avg,unemployed$s_avg)
## Warning in chisq.test(Employed$s_avg, unemployed$s_avg): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$s_avg and unemployed$s_avg
## X-squared = 412.8, df = 390, p-value = 0.2048
chisq.test(Employed$f_avg,unemployed$f_avg)
## Warning in chisq.test(Employed$f_avg, unemployed$f_avg): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Employed$f_avg and unemployed$f_avg
## X-squared = 240.27, df = 238, p-value = 0.4465

Logistic regression

library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2017 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
Obs<-subset(salary, salary != 998  & salary != 999 & salary!=0)
missmap(Obs, main = "Missing values vs observed")

Obs$employed <- ifelse(Obs$salary > 0,1, 0)

splitting dataset

c1<-Obs[1:96,]
c2<-Obs[96:193,]

annova test for c1

nmodel <- glm(formula = employed ~ age+gmat_tpc+gmat_qpc+gmat_vpc+frstlang+quarter, family = binomial(link = "logit"), data = c1)
anova(nmodel,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: employed
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL                        95 0.0000e+00         
## age       1        0        94 5.5695e-10        1
## gmat_tpc  1        0        93 5.5695e-10        1
## gmat_qpc  1        0        92 5.5695e-10        1
## gmat_vpc  1        0        91 5.5695e-10        1
## frstlang  1        0        90 5.5695e-10        1
## quarter   1        0        89 5.5695e-10        1

annova test for c2

nmodel <- glm(formula = employed ~ age+gmat_tpc+gmat_qpc+gmat_vpc+frstlang+quarter, family = binomial(link = "logit"), data = c2)
anova(nmodel,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: employed
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL                         7 0.0000e+00         
## age       1        0         6 3.4294e-10        1
## gmat_tpc  1        0         5 3.4294e-10        1
## gmat_qpc  1        0         4 3.4294e-10        1
## gmat_vpc  1        0         3 3.4294e-10        1
## frstlang  1        0         2 3.4294e-10        1
## quarter   0        0         2 3.4294e-10