setwd("~/winter internship")
sal <- read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
 View(sal)
library(psych)
describe(sal)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45
boxplot(sal$salary ~ sal$sex, horizontal=TRUE,
    xlab="salary",ylab="gender" ,las=1,
     main="Salary distribution based on gender")
axis(side=4,at=c(1,2),labels=c("male","female"))

boxplot(sal$salary ~ sal$quarter, horizontal=TRUE,
    xlab="salary",ylab="quarter" ,las=1,
     main="Salary distribution based on quartile ranking")

boxplot(sal$salary ~ sal$work_yrs, horizontal=TRUE,
    xlab="salary",ylab="work experience" ,las=1,
     main="Salary distribution based on work experience")

boxplot(sal$salary ~ sal$frstlang, horizontal=TRUE,
    xlab="salary",ylab="language" ,las=1,
     main="Salary distribution based on first language spoken")
axis(side=4,at=c(1,2),labels=c("english","other"))

boxplot(sal$salary ~ sal$satis, horizontal=TRUE,
    xlab="salary",ylab="salary satisfaction" ,las=1,
     main="Salary distribution based on satisfaction")

library(lattice)
barchart(satis ~ salary,data = sal,col="orange")

x3 <- table(sal$salary , sal$frstlang)
barplot(x3, main = "Starting Salary vs language", xlab = "language-english or others", ylab = "Starting Salary",col="red")

library(lattice)
barchart(quarter ~ salary,data = sal,col="orange")

x4 <- table(sal$salary , sal$quarter)
barplot(x4, , xlab = "quartile perncentile", ylab = "Starting Salary count",col="red")

x2 <- table(sal$salary , sal$work_yrs)
barplot(x2, , xlab = "work experience", ylab = "Starting Salary count",col="red")

x1 <- table(sal$salary , sal$sex)
barplot(x1, , xlab = "Sex", ylab = "Starting Salary count",col="red")

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(sal$age, sal$salary, main = "scatterplot- Starting Salary vs age ", pch=16)

library(car)
scatterplot(sal$gmat_qpc, sal$salary, main = "scatterplot- Starting Salary vs gmat quantitative percentile", pch=16)

library(car)
scatterplot(sal$gmat_tpc, sal$salary, main = "scatterplot- Starting Salary vs  total gmat percentile", pch=16)

library(car)
scatterplot(sal$gmat_vpc, sal$salary, main = "scatterplot- Starting Salary vs verbal gmat percentile ", pch=16)

library(car)
scatterplot(sal$gmat_tot, sal$salary, main = "scatterplot- Starting Salary vs total gmat score ", pch=16)

library(car)
scatterplot(sal$s_avg, sal$salary, main = "scatterplot- Starting Salary vs summer mba average ", pch=16)

library(car)
scatterplot(sal$f_avg, sal$salary, main = "scatterplot- Starting Salary vs fall mba average ", pch=16)

library(corrgram)
corrgram(sal,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt)

round(cor(sal),2)
##            age   sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age       1.00 -0.03    -0.15    -0.22    -0.04    -0.17  0.15 -0.02
## sex      -0.03  1.00    -0.05    -0.16     0.07    -0.01  0.13  0.09
## gmat_tot -0.15 -0.05     1.00     0.72     0.75     0.85  0.11  0.10
## gmat_qpc -0.22 -0.16     0.72     1.00     0.15     0.65 -0.03  0.07
## gmat_vpc -0.04  0.07     0.75     0.15     1.00     0.67  0.20  0.08
## gmat_tpc -0.17 -0.01     0.85     0.65     0.67     1.00  0.12  0.08
## s_avg     0.15  0.13     0.11    -0.03     0.20     0.12  1.00  0.55
## f_avg    -0.02  0.09     0.10     0.07     0.08     0.08  0.55  1.00
## quarter  -0.05 -0.13    -0.09     0.04    -0.17    -0.08 -0.76 -0.45
## work_yrs  0.86 -0.01    -0.18    -0.24    -0.07    -0.17  0.13 -0.04
## frstlang  0.06  0.00    -0.14     0.14    -0.39    -0.10 -0.14 -0.04
## salary   -0.06  0.07    -0.05    -0.04    -0.01     0.00  0.15  0.03
## satis    -0.13 -0.05     0.08     0.06     0.06     0.09 -0.03  0.01
##          quarter work_yrs frstlang salary satis
## age        -0.05     0.86     0.06  -0.06 -0.13
## sex        -0.13    -0.01     0.00   0.07 -0.05
## gmat_tot   -0.09    -0.18    -0.14  -0.05  0.08
## gmat_qpc    0.04    -0.24     0.14  -0.04  0.06
## gmat_vpc   -0.17    -0.07    -0.39  -0.01  0.06
## gmat_tpc   -0.08    -0.17    -0.10   0.00  0.09
## s_avg      -0.76     0.13    -0.14   0.15 -0.03
## f_avg      -0.45    -0.04    -0.04   0.03  0.01
## quarter     1.00    -0.09     0.10  -0.16  0.00
## work_yrs   -0.09     1.00    -0.03   0.01 -0.11
## frstlang    0.10    -0.03     1.00  -0.09  0.08
## salary     -0.16     0.01    -0.09   1.00 -0.34
## satis       0.00    -0.11     0.08  -0.34  1.00
 x <- sal[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   y <- sal[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
   cov(x,y)
##                    age      gmat_tot      gmat_qpc     gmat_vpc
## age       1.376904e+01 -3.115879e+01 -1.192655e+01    -2.763643
## gmat_tot -3.115879e+01  3.310688e+03  6.200233e+02   726.000642
## gmat_qpc -1.192655e+01  6.200233e+02  2.210731e+02    38.148258
## gmat_vpc -2.763643e+00  7.260006e+02  3.814826e+01   284.248122
## gmat_tpc -8.839978e+00  6.839911e+02  1.357997e+02   157.493249
## s_avg     2.116874e-01  2.480257e+00 -1.691233e-01     1.313570
## f_avg    -3.399348e-02  3.154688e+00  5.753854e-01     0.672070
## work_yrs  1.029494e+01 -3.391634e+01 -1.137186e+01    -3.618165
## salary   -1.183042e+04 -1.611600e+05 -3.335823e+04 -5273.852384
##              gmat_tpc        s_avg        f_avg     work_yrs        salary
## age        -8.8399775    0.2116874  -0.03399348   10.2949386 -1.183042e+04
## gmat_tot  683.9910698    2.4802572   3.15468838  -33.9163391 -1.611600e+05
## gmat_qpc  135.7996845   -0.1691233   0.57538542  -11.3718617 -3.335823e+04
## gmat_vpc  157.4932488    1.3135702   0.67207000   -3.6181653 -5.273852e+03
## gmat_tpc  196.6057057    0.6271001   0.58698618   -7.8575172  3.522750e+03
## s_avg       0.6271001    0.1452176   0.11016898    0.1592639  2.831601e+03
## f_avg       0.5869862    0.1101690   0.27567237   -0.0662870  7.876560e+02
## work_yrs   -7.8575172    0.1592639  -0.06628700   10.4488249  1.486147e+03
## salary   3522.7500067 2831.6009858 787.65597177 1486.1470415  2.596062e+09
job <- sal[ which(sal$salary !="998" & sal$salary !="999" & sal$salary!="0"), ]
    head(job)
##    age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35  22   2      660       90       92       94   3.5  3.75       1
## 36  27   2      700       94       98       98   3.3  3.25       1
## 37  25   2      680       87       96       96   3.5  2.67       1
## 38  25   2      650       82       91       93   3.4  3.25       1
## 39  27   1      710       96       96       98   3.3  3.50       1
## 40  28   2      620       52       98       87   3.4  3.75       1
##    work_yrs frstlang salary satis
## 35        1        1  85000     5
## 36        2        1  85000     6
## 37        2        1  86000     5
## 38        3        1  88000     7
## 39        2        1  92000     6
## 40        5        1  93000     5
    View(job)

Chi square tests

chisq.test(job$age, job$salary)
## Warning in chisq.test(job$age, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$age and job$salary
## X-squared = 717.62, df = 574, p-value = 3.929e-05
chisq.test(job$sex, job$salary)
## Warning in chisq.test(job$sex, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$sex and job$salary
## X-squared = 52.681, df = 41, p-value = 0.1045
chisq.test(job$gmat_tot, job$salary)
## Warning in chisq.test(job$gmat_tot, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$gmat_tot and job$salary
## X-squared = 927.24, df = 820, p-value = 0.005279
chisq.test(job$gmat_qpc, job$salary)
## Warning in chisq.test(job$gmat_qpc, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$gmat_qpc and job$salary
## X-squared = 1464.3, df = 1353, p-value = 0.018
chisq.test(job$gmat_vpc, job$salary)
## Warning in chisq.test(job$gmat_vpc, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$gmat_vpc and job$salary
## X-squared = 1183.3, df = 1066, p-value = 0.006802
chisq.test(job$gmat_tpc, job$salary)
## Warning in chisq.test(job$gmat_tpc, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$gmat_tpc and job$salary
## X-squared = 1422.2, df = 1230, p-value = 0.0001065
chisq.test(job$s_avg, job$salary)
## Warning in chisq.test(job$s_avg, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$s_avg and job$salary
## X-squared = 792.97, df = 861, p-value = 0.9524
chisq.test(job$f_avg, job$salary)
## Warning in chisq.test(job$f_avg, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$f_avg and job$salary
## X-squared = 596.28, df = 574, p-value = 0.2518
chisq.test(job$quarter, job$salary)
## Warning in chisq.test(job$quarter, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$quarter and job$salary
## X-squared = 129.85, df = 123, p-value = 0.3186
chisq.test(job$satis, job$salary)
## Warning in chisq.test(job$satis, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$satis and job$salary
## X-squared = 109.1, df = 164, p-value = 0.9997
chisq.test(job$work_yrs, job$salary)
## Warning in chisq.test(job$work_yrs, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$work_yrs and job$salary
## X-squared = 535.23, df = 451, p-value = 0.003809
chisq.test(job$frstlang, job$salary)
## Warning in chisq.test(job$frstlang, job$salary): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$frstlang and job$salary
## X-squared = 69.847, df = 41, p-value = 0.003296
chisq.test(job$sex, job$salary)
## Warning in chisq.test(job$sex, job$salary): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  job$sex and job$salary
## X-squared = 52.681, df = 41, p-value = 0.1045

T square tests

t.test(job$age, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$age and job$salary
## t = -58.503, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106496.23  -99511.69
## sample estimates:
##   mean of x   mean of y 
##     26.7767 103030.7379
t.test(job$sex, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$sex and job$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106521.71  -99537.17
## sample estimates:
##    mean of x    mean of y 
## 1.300971e+00 1.030307e+05
t.test(job$gmat_tot, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$gmat_tot and job$salary
## t = -58.168, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -105907.00  -98922.43
## sample estimates:
##   mean of x   mean of y 
##    616.0194 103030.7379
t.test(job$gmat_qpc, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$gmat_qpc and job$salary
## t = -58.473, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106443.28  -99458.74
## sample estimates:
##    mean of x    mean of y 
##     79.72816 103030.73786
t.test(job$gmat_vpc, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$gmat_vpc and job$salary
## t = -58.473, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106444.4  -99459.9
## sample estimates:
##    mean of x    mean of y 
##     78.56311 103030.73786
t.test(job$gmat_tpc, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$gmat_tpc and job$salary
## t = -58.47, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106438.49  -99453.94
## sample estimates:
##    mean of x    mean of y 
##     84.52427 103030.73786
t.test(job$s_avg, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$s_avg and job$salary
## t = -58.516, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106519.92  -99535.37
## sample estimates:
##    mean of x    mean of y 
##      3.09233 103030.73786
t.test(job$f_avg, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$f_avg and job$salary
## t = -58.516, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106519.92  -99535.38
## sample estimates:
##    mean of x    mean of y 
## 3.090971e+00 1.030307e+05
t.test(job$quarter, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$quarter and job$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106520.7  -99536.2
## sample estimates:
##    mean of x    mean of y 
## 2.262136e+00 1.030307e+05
t.test(job$satis, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$satis and job$salary
## t = -58.515, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106517.13  -99532.58
## sample estimates:
##    mean of x    mean of y 
## 5.883495e+00 1.030307e+05
t.test(job$work_yrs, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$work_yrs and job$salary
## t = -58.516, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106519.33  -99534.79
## sample estimates:
##    mean of x    mean of y 
## 3.679612e+00 1.030307e+05
t.test(job$frstlang, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$frstlang and job$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106521.9  -99537.4
## sample estimates:
##    mean of x    mean of y 
## 1.067961e+00 1.030307e+05
t.test(job$sex, job$salary)
## 
##  Welch Two Sample t-test
## 
## data:  job$sex and job$salary
## t = -58.517, df = 102, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -106521.71  -99537.17
## sample estimates:
##    mean of x    mean of y 
## 1.300971e+00 1.030307e+05

Contingency tables

mytable <-xtabs(~salary+sex,data=job)
    mytable
##         sex
## salary    1  2
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
 mytable1 <-xtabs(~salary+work_yrs,data=job)
    mytable1
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0
 mytable2<-xtabs(~salary+frstlang,data=job)
    mytable2
##         frstlang
## salary    1  2
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
mytable3<-xtabs(~salary+gmat_tot,data=job)
    mytable3
##         gmat_tot
## salary   500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
##   64000    0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   78256    0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   90000    0   0   0   0   0   0   0   1   0   0   0   0   1   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   93000    0   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0
##   95000    0   0   1   0   0   2   0   0   0   0   2   0   0   0   0   0
##   96000    0   0   0   0   0   1   0   0   1   1   0   0   0   0   1   0
##   96500    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
##   98000    0   0   0   0   0   1   3   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   100000   0   0   0   0   0   2   0   1   0   1   1   0   1   0   2   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   104000   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   2   0   2   3   0   1   0   1   0   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   108000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   112000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   115000   0   0   0   1   0   0   1   0   0   0   0   1   1   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   120000   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0
##   126710   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   670 680 700 710 720
##   64000    0   0   0   0   0
##   77000    0   0   0   0   0
##   78256    0   0   0   0   0
##   82000    1   0   0   0   0
##   85000    0   0   1   0   1
##   86000    0   1   0   0   0
##   88000    0   0   0   0   0
##   88500    0   0   0   0   0
##   90000    0   0   0   0   0
##   92000    0   0   0   1   0
##   93000    0   0   0   0   0
##   95000    2   0   0   0   0
##   96000    0   0   0   0   0
##   96500    0   0   0   0   0
##   97000    0   0   0   0   0
##   98000    1   1   0   1   0
##   99000    0   0   0   0   0
##   100000   0   0   0   1   0
##   100400   0   0   0   0   0
##   101000   0   0   0   0   0
##   101100   0   0   0   0   0
##   101600   0   0   0   0   0
##   102500   1   0   0   0   0
##   103000   0   0   0   0   0
##   104000   0   0   0   0   0
##   105000   0   1   0   0   0
##   106000   0   2   0   0   0
##   107000   0   0   0   0   0
##   107300   0   0   0   0   0
##   107500   0   0   0   0   0
##   108000   0   0   0   0   0
##   110000   0   0   0   0   0
##   112000   1   1   0   0   0
##   115000   0   0   0   1   0
##   118000   0   0   0   0   0
##   120000   1   0   1   0   0
##   126710   0   0   0   0   0
##   130000   0   0   0   0   0
##   145800   0   0   0   0   0
##   146000   0   0   0   0   0
##   162000   0   0   1   0   0
##   220000   0   0   0   0   0
mytable4<-xtabs(~salary+age,data=job)
    mytable4
##         age
## salary   22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   85000   1  0  0  1  1  1  0  0  0  0  0  0  0  0  0
##   86000   0  0  0  1  1  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   92000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  1  0  0  1  0  0  1  0  0  0  0  0
##   95000   0  0  1  5  0  0  0  1  0  0  0  0  0  0  0
##   96000   0  0  1  1  2  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  1  1  0  0  0  0  0  0  0  0
##   98000   0  1  3  2  1  1  1  1  0  0  0  0  0  0  0
##   99000   0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   100000  0  1  4  1  1  1  0  0  0  1  0  0  0  0  0
##   100400  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101000  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   101600  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
##   103000  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0
##   105000  0  1  1  2  3  1  0  0  1  1  0  0  1  0  0
##   106000  0  0  0  0  0  0  0  1  2  0  0  0  0  0  0
##   107000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
##   107500  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  0  1  0  0  0  0  0  1  0
##   115000  0  0  1  1  0  3  0  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   120000  0  0  0  0  0  1  1  0  2  0  0  0  0  0  0
##   126710  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##   162000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
mytable5<-xtabs(~salary+satis,data=job)
    mytable5
##         satis
## salary   3 4 5 6 7
##   64000  0 0 0 0 1
##   77000  0 0 0 1 0
##   78256  0 0 1 0 0
##   82000  0 0 0 0 1
##   85000  0 0 1 3 0
##   86000  0 0 2 0 0
##   88000  0 0 0 0 1
##   88500  0 0 0 1 0
##   90000  0 0 2 0 1
##   92000  0 0 1 1 1
##   93000  0 0 1 2 0
##   95000  1 1 1 2 2
##   96000  0 0 1 1 2
##   96500  0 0 0 1 0
##   97000  0 0 0 1 1
##   98000  0 0 2 5 3
##   99000  0 0 0 1 0
##   100000 0 0 1 6 2
##   100400 0 0 0 0 1
##   101000 0 0 1 1 0
##   101100 0 0 0 1 0
##   101600 0 0 0 1 0
##   102500 0 0 1 0 0
##   103000 0 0 0 1 0
##   104000 0 0 1 1 0
##   105000 0 0 4 6 1
##   106000 0 0 0 2 1
##   107000 0 0 1 0 0
##   107300 0 0 0 0 1
##   107500 0 0 1 0 0
##   108000 0 0 0 2 0
##   110000 0 0 1 0 0
##   112000 0 0 0 2 1
##   115000 0 0 3 2 0
##   118000 0 0 0 0 1
##   120000 0 0 2 2 0
##   126710 0 0 0 1 0
##   130000 0 0 0 0 1
##   145800 0 0 0 1 0
##   146000 0 0 0 1 0
##   162000 0 0 1 0 0
##   220000 0 0 0 1 0

Regression

MODEL 1

 fit <- lm(salary ~ gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc + age + sex + quarter + frstlang + work_yrs + satis + f_avg + s_avg , data = job)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + 
##     age + sex + quarter + frstlang + work_yrs + satis + f_avg + 
##     s_avg, data = job)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26489  -7983   -373   5923  70602 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 78005.66   52981.93   1.472   0.1444  
## gmat_tot       16.19     178.85   0.090   0.9281  
## gmat_qpc      796.55     496.78   1.603   0.1123  
## gmat_vpc      546.31     501.97   1.088   0.2794  
## gmat_tpc    -1457.09     714.94  -2.038   0.0445 *
## age          1750.65    1130.92   1.548   0.1251  
## sex         -3584.07    3595.85  -0.997   0.3216  
## quarter     -2336.56    2721.89  -0.858   0.3929  
## frstlang     7719.42    7373.27   1.047   0.2979  
## work_yrs      749.66    1135.90   0.660   0.5110  
## satis       -1086.54    2157.76  -0.504   0.6158  
## f_avg       -2222.82    3894.57  -0.571   0.5696  
## s_avg        -931.53    8240.31  -0.113   0.9102  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15430 on 90 degrees of freedom
## Multiple R-squared:  0.3422, Adjusted R-squared:  0.2545 
## F-statistic: 3.902 on 12 and 90 DF,  p-value: 8.086e-05

MODEL 2

fit <- lm(salary ~ gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc +  sex + quarter + frstlang +  satis + f_avg + s_avg , data = job)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + 
##     sex + quarter + frstlang + satis + f_avg + s_avg, data = job)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31452  -7909  -1321   6283  93310 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 109064.86   52356.52   2.083  0.04001 * 
## gmat_tot        53.22     193.43   0.275  0.78384   
## gmat_qpc       727.40     537.14   1.354  0.17899   
## gmat_vpc       606.30     541.39   1.120  0.26567   
## gmat_tpc     -1702.39     770.31  -2.210  0.02959 * 
## sex          -6306.25    3784.14  -1.666  0.09902 . 
## quarter      -2598.15    2946.61  -0.882  0.38021   
## frstlang     18737.05    6970.54   2.688  0.00853 **
## satis         -341.23    2327.82  -0.147  0.88378   
## f_avg        -7546.95    3989.74  -1.892  0.06169 . 
## s_avg         6097.52    8737.71   0.698  0.48704   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16710 on 92 degrees of freedom
## Multiple R-squared:  0.2115, Adjusted R-squared:  0.1258 
## F-statistic: 2.468 on 10 and 92 DF,  p-value: 0.01156

MODEL 3

 fit <- lm(salary ~ gmat_tot+gmat_qpc+gmat_tpc +gmat_vpc + age + sex + quarter + frstlang + work_yrs + satis , data = job)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_tpc + gmat_vpc + 
##     age + sex + quarter + frstlang + work_yrs + satis, data = job)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -27095  -8075   -265   5590  70231 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 74344.83   51260.45   1.450   0.1504  
## gmat_tot      -14.53     164.63  -0.088   0.9299  
## gmat_qpc      834.28     479.73   1.739   0.0854 .
## gmat_tpc    -1406.80     703.65  -1.999   0.0485 *
## gmat_vpc      594.88     481.23   1.236   0.2195  
## age          1796.21    1111.80   1.616   0.1096  
## sex         -3888.24    3511.97  -1.107   0.2711  
## quarter     -1614.07    1443.38  -1.118   0.2664  
## frstlang     7745.73    7167.62   1.081   0.2827  
## work_yrs      796.13    1122.85   0.709   0.4801  
## satis       -1240.34    2107.59  -0.589   0.5576  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15290 on 92 degrees of freedom
## Multiple R-squared:  0.3395, Adjusted R-squared:  0.2677 
## F-statistic:  4.73 on 10 and 92 DF,  p-value: 1.877e-05

MODEL 4

 fit <- lm(salary ~ gmat_tot+gmat_qpc+gmat_tpc +gmat_vpc + age+ sex + quarter + frstlang +  satis , data = job)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_tpc + gmat_vpc + 
##     age + sex + quarter + frstlang + satis, data = job)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -23910  -8097   -938   5154  71005 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 60845.48   47466.11   1.282   0.2031    
## gmat_tot      -14.14     164.19  -0.086   0.9315    
## gmat_qpc      818.90     477.95   1.713   0.0900 .  
## gmat_tpc    -1380.08     700.76  -1.969   0.0519 .  
## gmat_vpc      568.98     478.55   1.189   0.2375    
## age          2488.70     529.81   4.697 9.08e-06 ***
## sex         -3593.73    3477.98  -1.033   0.3042    
## quarter     -1610.02    1439.50  -1.118   0.2663    
## frstlang     6285.79    6847.12   0.918   0.3610    
## satis       -1270.86    2101.51  -0.605   0.5468    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15250 on 93 degrees of freedom
## Multiple R-squared:  0.3359, Adjusted R-squared:  0.2717 
## F-statistic: 5.227 on 9 and 93 DF,  p-value: 9.591e-06

MODEL5

 fit <- lm(salary ~ gmat_qpc+gmat_tpc +gmat_vpc + age+ sex+ quarter , data = job)
summary(fit)
## 
## Call:
## lm(formula = salary ~ gmat_qpc + gmat_tpc + gmat_vpc + age + 
##     sex + quarter, data = job)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -25414  -7424   -895   5108  72791 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  53235.7    20930.8   2.543   0.0126 *  
## gmat_qpc       836.3      349.0   2.396   0.0185 *  
## gmat_tpc     -1449.3      681.8  -2.126   0.0361 *  
## gmat_vpc       537.3      350.9   1.531   0.1290    
## age           2647.9      475.8   5.565 2.37e-07 ***
## sex          -2889.2     3340.4  -0.865   0.3892    
## quarter      -1651.4     1361.4  -1.213   0.2281    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15100 on 96 degrees of freedom
## Multiple R-squared:  0.3279, Adjusted R-squared:  0.2859 
## F-statistic: 7.807 on 6 and 96 DF,  p-value: 7.449e-07

Therefore model 5 is the best suited model to explain the variation in starting salaries with adjusted r-squared value of 0.2859

Age, gmat_tpc,gmat_qpc are statistically significant Sex,quarter and gmat_vpc are statistically insignificant

Placed vs non Placed

sal$salary[sal$salary>0] = "placed"
sal$salary[sal$salary < 1] = " not placed"
table1=xtabs(~sex+salary,data=sal)
table1
##    salary
## sex  not placed placed
##   1          67    139
##   2          23     45
chisq.test(table1)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table1
## X-squared = 0.0023919, df = 1, p-value = 0.961

placement doesn’t depend on sex

table2=xtabs(~age+salary,data=sal)
table2
##     salary
## age   not placed placed
##   22           1      1
##   23           3      5
##   24          13     20
##   25           9     44
##   26          10     30
##   27          14     32
##   28           6     15
##   29          11     11
##   30           2     10
##   31           2      8
##   32           5      3
##   33           0      1
##   34           3      1
##   35           3      0
##   36           2      0
##   37           1      0
##   39           1      1
##   40           0      2
##   42           1      0
##   43           2      0
##   48           1      0
chisq.test(table2)
## Warning in chisq.test(table2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table2
## X-squared = 42.144, df = 20, p-value = 0.002649

placement depends on age

chisq.test(sal$salary,sal$gmat_tot)
## Warning in chisq.test(sal$salary, sal$gmat_tot): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  sal$salary and sal$gmat_tot
## X-squared = 33.128, df = 30, p-value = 0.317

placement doesn’t depend on gmat score

chisq.test(sal$salary,sal$work_yrs)
## Warning in chisq.test(sal$salary, sal$work_yrs): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  sal$salary and sal$work_yrs
## X-squared = 35.953, df = 17, p-value = 0.004653

placement depends on work ex

table3=xtabs(~satis+salary,data=sal)
table3
##      salary
## satis  not placed placed
##   1             0      1
##   2             0      1
##   3             0      5
##   4             4     13
##   5            36     38
##   6            40     57
##   7            10     23
##   998           0     46
chisq.test(sal$salary,sal$satis)
## Warning in chisq.test(sal$salary, sal$satis): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  sal$salary and sal$satis
## X-squared = 38.163, df = 7, p-value = 2.822e-06

placement depend on satisfaction level

table4=xtabs(~quarter+salary,data=sal)
table4
##        salary
## quarter  not placed placed
##       1          18     51
##       2          27     43
##       3          23     47
##       4          22     43
chisq.test(sal$salary,sal$gmat_tpc)
## Warning in chisq.test(sal$salary, sal$gmat_tpc): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  sal$salary and sal$gmat_tpc
## X-squared = 41.21, df = 41, p-value = 0.4614

placement doesn’t depend on gmat percentile

table5=xtabs(~frstlang+salary,data=sal)
table5
##         salary
## frstlang  not placed placed
##        1          82    160
##        2           8     24
chisq.test(table5)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table5
## X-squared = 0.64868, df = 1, p-value = 0.4206

placement doesn’t depend on language

logistic regression

library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2017 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(sal, main = "Missing values vs observed")

sal1<-subset(sal,salary!='998',salary!='999')
sal1$salary[sal1$salary>0] = 1
sal1$salary[sal1$salary < 1] = 0
sal1$salary <- as.numeric(sal1$salary)
train <- sal1[1:128,]
test<- sal1[129:228,]
x<-glm(salary~. ,family = binomial(link = "logit"),data = train )
summary(x)
## 
## Call:
## glm(formula = salary ~ ., family = binomial(link = "logit"), 
##     data = train)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.00823  -1.01971   0.03027   0.90522   1.56385  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  1.073601   8.425423   0.127    0.899
## age         -0.158356   0.119846  -1.321    0.186
## sex          0.093349   0.476485   0.196    0.845
## gmat_tot    -0.000669   0.016705  -0.040    0.968
## gmat_qpc     0.010828   0.045364   0.239    0.811
## gmat_vpc     0.010181   0.043432   0.234    0.815
## gmat_tpc    -0.016722   0.032506  -0.514    0.607
## s_avg        1.519797   1.692086   0.898    0.369
## f_avg       -0.296257   0.417324  -0.710    0.478
## quarter     -0.702429   0.770050  -0.912    0.362
## work_yrs     0.038171   0.140488   0.272    0.786
## frstlang     0.425971   0.825812   0.516    0.606
## satis        0.007840   0.012721   0.616    0.538
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 165.99  on 127  degrees of freedom
## Residual deviance: 130.04  on 115  degrees of freedom
## AIC: 156.04
## 
## Number of Fisher Scoring iterations: 10
fitted.results <- predict(x,test,type='response')
fitted.results <- ifelse(fitted.results > 0.5,1,0)

misClasificError <- mean(fitted.results != test$salary)
print(paste('Accuracy',1-misClasificError))
## [1] "Accuracy 0.52"
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
p <- predict(x, test, type="response")
pr <- prediction(p, test$salary)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)