R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data3 <- read.csv("StartingSal.csv")
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(data3)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

This is the summary of the data.

Description of data and its fields

##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
hist(data3$age,col="green",breaks=8,main="Age Distribution",xlab="Age")

hist(data3$salary,col="red",breaks=30,main="Salary wise distribution",xlab="Salary")

hist(data3$gmat_tot,main="GMAT Score",xlab="Score in GMAT",col="blue")

hist(data3$sex,xlab="Genwise wise distribution",main="Gender",col="green")

hist(data3$work_yrs,xlab="Work Experience",main="Work Experience Distribution",col="red")

hist(data3$s_avg,main="Spring score of students",xlab="Spring Average",col="brown")

newd <- data3[which(data3$satis<=7),]
hist(newd$satis,xlab="Satisfaction",main="Satisfaction with MBA Program",col="green",breaks=5)

hist(data3$quarter,main="Quartile Ranking",xlab="Ranking",col="blue",breaks=4)

data4  <-data3[which(data3$salary!=0 & data3$salary != 998 & data3$salary != 999),]
boxplot(data4$salary~data4$sex,main="Plot of Salary vs Age",ylab="Sex",horizontal=TRUE)

boxplot(data4$salary~data4$frstlang,main="Plot of Salary vs first language",ylab="FIRST LANGUAGE",horizontal=TRUE)

library(car)
## Warning: package 'car' was built under R version 3.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
data4  <-data3[which(data3$salary!=0 & data3$salary != 998 & data3$salary != 999),]
scatterplot(data4$salary~data4$age,xlab="Age",ylab="Salary")

scatterplot(data4$salary~data4$quarter,xlab="Percentile",ylab="Salary")

scatterplot(data4$salary~data4$work_yrs,xlab="Work Experience",ylab="Salary")

scatterplot(data4$salary~data4$gmat_tot,xlab="GMat Total",ylab="Salary")

scatterplot(data4$salary~data4$f_avg,xlab="Fall Average",ylab="Salary")

scatterplot(data4$salary~data4$s_avg,xlab="Spring Average",ylab="Salary")

scatterplot(data4$salary~data4$satis,xlab="Age",ylab="Satisfaction")

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(data4,order = TRUE,text.panel = panel.txt,lower.panel = panel.shade,upper.panel = panel.pie,main="Corrogram")

round(cor(data4),2)
##            age   sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age       1.00 -0.14    -0.08    -0.17     0.02    -0.10  0.16 -0.22
## sex      -0.14  1.00    -0.02    -0.15     0.05    -0.05  0.08  0.17
## gmat_tot -0.08 -0.02     1.00     0.67     0.78     0.97  0.17  0.12
## gmat_qpc -0.17 -0.15     0.67     1.00     0.09     0.66  0.02  0.10
## gmat_vpc  0.02  0.05     0.78     0.09     1.00     0.78  0.16  0.02
## gmat_tpc -0.10 -0.05     0.97     0.66     0.78     1.00  0.14  0.07
## s_avg     0.16  0.08     0.17     0.02     0.16     0.14  1.00  0.45
## f_avg    -0.22  0.17     0.12     0.10     0.02     0.07  0.45  1.00
## quarter  -0.13 -0.02    -0.11     0.01    -0.13    -0.10 -0.84 -0.43
## work_yrs  0.88 -0.09    -0.12    -0.18    -0.03    -0.13  0.16 -0.22
## frstlang  0.35  0.08    -0.13     0.01    -0.22    -0.16 -0.14 -0.05
## salary    0.50 -0.17    -0.09     0.01    -0.14    -0.13  0.10 -0.11
## satis     0.11 -0.09     0.06     0.00     0.15     0.12 -0.14 -0.12
##          quarter work_yrs frstlang salary satis
## age        -0.13     0.88     0.35   0.50  0.11
## sex        -0.02    -0.09     0.08  -0.17 -0.09
## gmat_tot   -0.11    -0.12    -0.13  -0.09  0.06
## gmat_qpc    0.01    -0.18     0.01   0.01  0.00
## gmat_vpc   -0.13    -0.03    -0.22  -0.14  0.15
## gmat_tpc   -0.10    -0.13    -0.16  -0.13  0.12
## s_avg      -0.84     0.16    -0.14   0.10 -0.14
## f_avg      -0.43    -0.22    -0.05  -0.11 -0.12
## quarter     1.00    -0.13     0.11  -0.13  0.23
## work_yrs   -0.13     1.00     0.20   0.45  0.06
## frstlang    0.11     0.20     1.00   0.27  0.09
## salary     -0.13     0.45     0.27   1.00 -0.04
## satis       0.23     0.06     0.09  -0.04  1.00
cor(data4$salary,data4)
##            age        sex    gmat_tot  gmat_qpc   gmat_vpc   gmat_tpc
## [1,] 0.4996428 -0.1662887 -0.09067141 0.0141413 -0.1374323 -0.1320178
##          s_avg     f_avg    quarter  work_yrs  frstlang salary      satis
## [1,] 0.1017317 -0.106039 -0.1284853 0.4546663 0.2670195      1 -0.0400506
table1 <- xtabs(~salary+sex,data = data4)
table1
##         sex
## salary    1  2
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
table2 <- xtabs(~salary+frstlang,data = data4)
table2
##         frstlang
## salary    1  2
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
table3 <- xtabs(~salary+work_yrs,data = data4)
table3
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0
t.test(salary~sex,data = data4,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  salary by sex
## t = 1.6948, df = 101, p-value = 0.0932
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1099.123 13992.293
## sample estimates:
## mean in group 1 mean in group 2 
##       104970.97        98524.39
chisq.test(table2)
## Warning in chisq.test(table2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table2
## X-squared = 69.847, df = 41, p-value = 0.003296
chisq.test(table3)
## Warning in chisq.test(table3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table3
## X-squared = 535.23, df = 451, p-value = 0.003809
cor.test(data4$salary,data4$gmat_tot)
## 
##  Pearson's product-moment correlation
## 
## data:  data4$salary and data4$gmat_tot
## t = -0.91501, df = 101, p-value = 0.3624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2792952  0.1046903
## sample estimates:
##         cor 
## -0.09067141
cor.test(data4$salary,data4$s_avg)
## 
##  Pearson's product-moment correlation
## 
## data:  data4$salary and data4$s_avg
## t = 1.0277, df = 101, p-value = 0.3065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09363639  0.28955576
## sample estimates:
##       cor 
## 0.1017317
cor.test(data4$salary,data4$satis)
## 
##  Pearson's product-moment correlation
## 
## data:  data4$salary and data4$satis
## t = -0.40283, df = 101, p-value = 0.6879
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2317788  0.1546729
## sample estimates:
##        cor 
## -0.0400506
cor.test(data4$salary,data4$age)
## 
##  Pearson's product-moment correlation
## 
## data:  data4$salary and data4$age
## t = 5.7968, df = 101, p-value = 7.748e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3388862 0.6320523
## sample estimates:
##       cor 
## 0.4996428
chisq.test(data4$gmat_tot,data4$gmat_tpc)
## Warning in chisq.test(data4$gmat_tot, data4$gmat_tpc): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$gmat_tot and data4$gmat_tpc
## X-squared = 1651.9, df = 600, p-value < 2.2e-16
chisq.test(data4$salary,data4$quarter)
## Warning in chisq.test(data4$salary, data4$quarter): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$salary and data4$quarter
## X-squared = 129.85, df = 123, p-value = 0.3186
m1 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+s_avg+f_avg+quarter+satis,data = data4)
summary(m1)
## 
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang + 
##     s_avg + f_avg + quarter + satis, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24034  -8529  -1589   5875  80478 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 78541.32   41033.00   1.914   0.0587 .
## sex         -4956.56    3544.15  -1.399   0.1653  
## age          1637.25    1129.35   1.450   0.1505  
## work_yrs      792.84    1150.00   0.689   0.4923  
## gmat_tot      -11.17      32.05  -0.349   0.7282  
## frstlang    11069.51    7150.49   1.548   0.1250  
## s_avg        -779.00    8077.43  -0.096   0.9234  
## f_avg        -958.56    3869.78  -0.248   0.8049  
## quarter     -1633.49    2657.39  -0.615   0.5403  
## satis       -1987.24    2084.85  -0.953   0.3430  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15720 on 93 degrees of freedom
## Multiple R-squared:  0.2942, Adjusted R-squared:  0.2259 
## F-statistic: 4.308 on 9 and 93 DF,  p-value: 0.0001058
m2 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+f_avg+quarter+satis,data = data4)
summary(m2)
## 
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang + 
##     f_avg + quarter + satis, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24686  -8560  -1526   5832  80612 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 76486.27   34880.98   2.193   0.0308 *
## sex         -5003.41    3492.15  -1.433   0.1552  
## age          1627.90    1119.24   1.454   0.1491  
## work_yrs      789.59    1143.43   0.691   0.4916  
## gmat_tot      -11.61      31.56  -0.368   0.7138  
## frstlang    11174.82    7029.28   1.590   0.1152  
## f_avg       -1035.59    3766.44  -0.275   0.7840  
## quarter     -1434.10    1660.78  -0.864   0.3901  
## satis       -2001.75    2068.42  -0.968   0.3356  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15640 on 94 degrees of freedom
## Multiple R-squared:  0.2942, Adjusted R-squared:  0.2341 
## F-statistic: 4.897 on 8 and 94 DF,  p-value: 4.506e-05
m3 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+quarter+satis,data = data4)
summary(m3)
## 
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang + 
##     quarter + satis, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -25054  -8678  -1597   5943  80180 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 72694.79   31883.25   2.280   0.0248 *
## sex         -5128.21    3445.64  -1.488   0.1400  
## age          1657.58    1108.59   1.495   0.1382  
## work_yrs      807.86    1135.93   0.711   0.4787  
## gmat_tot      -12.18      31.34  -0.388   0.6985  
## frstlang    10999.97    6966.32   1.579   0.1177  
## quarter     -1220.16    1460.07  -0.836   0.4054  
## satis       -2011.88    2058.01  -0.978   0.3308  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15560 on 95 degrees of freedom
## Multiple R-squared:  0.2936, Adjusted R-squared:  0.2415 
## F-statistic:  5.64 on 7 and 95 DF,  p-value: 1.834e-05
m4 <- lm(salary~sex+age+work_yrs+frstlang+quarter+satis,data = data4)
summary(m4)
## 
## Call:
## lm(formula = salary ~ sex + age + work_yrs + frstlang + quarter + 
##     satis, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24386  -8915  -1714   6283  80982 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  65982.8    26677.0   2.473   0.0151 *
## sex          -5128.3     3430.4  -1.495   0.1382  
## age           1617.6     1098.9   1.472   0.1443  
## work_yrs       870.2     1119.5   0.777   0.4389  
## frstlang     11345.3     6878.7   1.649   0.1023  
## quarter      -1150.8     1442.7  -0.798   0.4270  
## satis        -2092.2     2038.5  -1.026   0.3073  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15490 on 96 degrees of freedom
## Multiple R-squared:  0.2925, Adjusted R-squared:  0.2482 
## F-statistic: 6.614 on 6 and 96 DF,  p-value: 7.096e-06
m5 <- lm(salary~sex+age+frstlang+quarter+satis,data = data4)
summary(m5)
## 
## Call:
## lm(formula = salary ~ sex + age + frstlang + quarter + satis, 
##     data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24008  -9061  -1996   6363  81963 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  50594.0    17844.4   2.835  0.00557 ** 
## sex          -4840.5     3403.4  -1.422  0.15815    
## age           2369.6      520.1   4.556 1.52e-05 ***
## frstlang      9942.1     6624.1   1.501  0.13663    
## quarter      -1126.3     1439.4  -0.783  0.43582    
## satis        -2173.4     2031.7  -1.070  0.28739    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15460 on 97 degrees of freedom
## Multiple R-squared:  0.288,  Adjusted R-squared:  0.2513 
## F-statistic: 7.848 on 5 and 97 DF,  p-value: 3.074e-06
m1 <- lm(salary~sex+age+frstlang+satis,data = data4)
summary(m1)
## 
## Call:
## lm(formula = salary ~ sex + age + frstlang + satis, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -25463  -9177  -1636   5686  79645 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  48730.8    17649.8   2.761  0.00688 ** 
## sex          -4720.6     3393.2  -1.391  0.16732    
## age           2452.8      508.1   4.827  5.1e-06 ***
## frstlang      9105.5     6524.3   1.396  0.16598    
## satis        -2542.7     1972.2  -1.289  0.20034    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15430 on 98 degrees of freedom
## Multiple R-squared:  0.2835, Adjusted R-squared:  0.2543 
## F-statistic: 9.695 on 4 and 98 DF,  p-value: 1.197e-06
m7 <- lm(salary~sex+age+frstlang,data = data4)
summary(m7)
## 
## Call:
## lm(formula = salary ~ sex + age + frstlang, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28723  -9214  -1296   5524  80180 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  35035.9    14142.6   2.477   0.0149 *  
## sex          -4343.6     3391.8  -1.281   0.2033    
## age           2409.7      508.7   4.737 7.26e-06 ***
## frstlang      8541.4     6531.3   1.308   0.1940    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15480 on 99 degrees of freedom
## Multiple R-squared:  0.2714, Adjusted R-squared:  0.2493 
## F-statistic: 12.29 on 3 and 99 DF,  p-value: 6.687e-07
m8 <- lm(salary~age+frstlang,data = data4)
summary(m8)
## 
## Call:
## lm(formula = salary ~ age + frstlang, data = data4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31507  -8412  -2035   4493  76632 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  27421.3    12872.8   2.130   0.0356 *  
## age           2528.2      501.8   5.038 2.09e-06 ***
## frstlang      7409.9     6492.0   1.141   0.2564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15530 on 100 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2445 
## F-statistic:  17.5 on 2 and 100 DF,  p-value: 3.036e-07

SO the parameters affecting salary found by backward regression models are Age and FirstLanguage.

nojob <- data3[which(data3$salary=="0"),]
data4 <- data4[1:90,]
chisq.test(data4$age,nojob$age)
## Warning in chisq.test(data4$age, nojob$age): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$age and nojob$age
## X-squared = 229.27, df = 252, p-value = 0.8449
chisq.test(data4$sex,nojob$sex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data4$sex and nojob$sex
## X-squared = 0.11711, df = 1, p-value = 0.7322
chisq.test(data4$work_yrs,nojob$work_yrs)
## Warning in chisq.test(data4$work_yrs, nojob$work_yrs): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$work_yrs and nojob$work_yrs
## X-squared = 117.66, df = 176, p-value = 0.9998
chisq.test(data4$quarter,nojob$quarter)
## Warning in chisq.test(data4$quarter, nojob$quarter): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$quarter and nojob$quarter
## X-squared = 110.98, df = 9, p-value < 2.2e-16
chisq.test(data4$gmat_tot,nojob$gmat_tot)
## Warning in chisq.test(data4$gmat_tot, nojob$gmat_tot): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  data4$gmat_tot and nojob$gmat_tot
## X-squared = 496.72, df = 500, p-value = 0.533

SO the relevant factor in getting job is Quartile.