Reading data into R

mba.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
View(mba.df)
mba1.df<-mba.df[ which(mba.df$satis!=998 & mba.df$salary!=999),]
mba2.df<-mba.df[ which(mba.df$salary!=998 & mba.df$salary!=999 & mba.df$salary!=0), ]

Summary statistics for the important variables in the dataset.

summary(mba.df)[,c(1:5,9:12)]
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        quarter         work_yrs         frstlang    
##  Min.   :16.00   Min.   :1.000   Min.   : 0.000   Min.   :1.000  
##  1st Qu.:71.00   1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000  
##  Median :81.00   Median :2.000   Median : 3.000   Median :1.000  
##  Mean   :78.32   Mean   :2.478   Mean   : 3.872   Mean   :1.117  
##  3rd Qu.:91.00   3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000  
##  Max.   :99.00   Max.   :4.000   Max.   :22.000   Max.   :2.000  
##      salary      
##  Min.   :     0  
##  1st Qu.:     0  
##  Median :   999  
##  Mean   : 39026  
##  3rd Qu.: 97000  
##  Max.   :220000
library(psych)
describe(mba.df)[,c(1:5,9:12)]
##          vars   n     mean       sd median    max  range  skew kurtosis
## age         1 274    27.36     3.71     27     48     26  2.16     6.45
## sex         2 274     1.25     0.43      1      2      1  1.16    -0.66
## gmat_tot    3 274   619.45    57.54    620    790    340 -0.01     0.06
## gmat_qpc    4 274    80.64    14.87     83     99     71 -0.92     0.30
## gmat_vpc    5 274    78.32    16.86     81     99     83 -1.04     0.74
## gmat_tpc    6 274    84.20    14.02     87     99     99 -2.28     9.02
## s_avg       7 274     3.03     0.38      3      4      2 -0.06    -0.38
## f_avg       8 274     3.06     0.53      3      4      4 -2.08    10.85
## quarter     9 274     2.48     1.11      2      4      3  0.02    -1.35
## work_yrs   10 274     3.87     3.23      3     22     22  2.78     9.80
## frstlang   11 274     1.12     0.32      1      2      1  2.37     3.65
## salary     12 274 39025.69 50951.56    999 220000 220000  0.70    -1.05
## satis      13 274   172.18   371.61      6    998    997  1.77     1.13

Box Plots / Bar Plots to visualize the distribution of each variable independently.

hist(mba.df$age, 
     main="Age distribution",
     xlab="age",
     ylab="Frequencies", col="grey" )

mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex, 
     main="Gender distribution",
    col="green" )

hist(mba.df$gmat_tot, 
     main="Gmat score distribution",
     xlab="scores",
     ylab="Frequencies", col="grey", breaks = 30 )

mba.df$frstlang=factor(mba.df$frstlang, levels=c(1,2), labels=c("English","other"))
plot(mba.df$frstlang, 
     main="language distribution",
    col="grey" )

hist(mba.df$work_yrs, 
     main="work experience distribution",
     xlab="experience years",
     ylab="Frequencies", col="grey", breaks = 30 )

rating <- mba.df[ which(mba.df$satis<='7'), ]
hist(rating$satis, 
     main="Satisfaction distribution",
     xlab="Rating (1=low,7=high)",
     ylab="Frequencies", col="grey",)

salary.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]
hist(salary.df$salary, 
     main="Salary distribution",
     xlab="Salary",
     ylab="Frequencies", col="grey",)

Salary comparison to other variables

Salary and sex

library(lattice)
boxplot(salary~sex, data=mba2.df,ylab="1 = Male; 2 = Female",xlab="Salary", horizontal=TRUE )

Salary and age

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(mba2.df$age, mba2.df$salary, 
     xlab="age", ylab="salary", main = "salary vs age")

Salary and work-experience

scatterplot(mba1.df$work_yrs, mba1.df$salary, xlab="Work experience (years) of those who got jobs", ylab="Salary", main="Salary vs Work Experience")

Salary and language

boxplot(mba2.df$frstlang, mba2.df$salary, xlab="1 = English Speaking   2 = others",ylab="Salary",main="Salary & language")

Corrgram

library(corrgram)
corrgram(mba2.df, order=FALSE, lower.panel=panel.shade, upper.panel=panel.pie, diag.panel=panel.minmax, text.panel=panel.txt, main="Corrgram of mba starting salaries")

TASK 1b: WHO GOT HOW MUCH SALARY?

Tables

Salary & Sex

table(mba1.df$salary, mba1.df$sex)
##         
##           1  2
##   0      67 23
##   64000   0  1
##   77000   1  0
##   78256   0  1
##   82000   0  1
##   85000   1  3
##   86000   0  2
##   88000   0  1
##   88500   1  0
##   90000   3  0
##   92000   2  1
##   93000   2  1
##   95000   4  3
##   96000   3  1
##   96500   1  0
##   97000   2  0
##   98000   6  4
##   99000   0  1
##   100000  4  5
##   100400  1  0
##   101000  0  2
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  2  0
##   105000 11  0
##   106000  2  1
##   107000  1  0
##   107300  1  0
##   107500  1  0
##   108000  2  0
##   110000  0  1
##   112000  3  0
##   115000  5  0
##   118000  1  0
##   120000  3  1
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1

Salary & age

table(mba1.df$salary, mba1.df$age)
##         
##          22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 39 40 42 43 48
##   0       1  3 13  9 10 14  6 11  2  2  5  0  3  3  2  1  1  0  1  2  1
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   85000   1  0  0  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   86000   0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   90000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   92000   0  0  0  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  1  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   95000   0  0  1  5  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   96000   0  0  1  1  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   98000   0  1  3  2  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   99000   0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   100000  0  1  4  1  1  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   100400  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101000  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101600  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   103000  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   105000  0  1  1  2  3  1  0  0  1  1  0  0  1  0  0  0  0  0  0  0  0
##   106000  0  0  0  0  0  0  0  1  2  0  0  0  0  0  0  0  0  0  0  0  0
##   107000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0  0  0
##   115000  0  0  1  1  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   120000  0  0  0  0  0  1  1  0  2  0  0  0  0  0  0  0  0  0  0  0  0
##   126710  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   162000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0

salary & gmat score

table(mba1.df$salary, mba1.df$gmat_tot)
##         
##          450 480 500 510 520 530 540 550 560 570 580 590 600 610 620 630
##   0        1   1   0   2   0   3   3   4   8   7   4   3   3   9   4   5
##   64000    0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   78256    0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   90000    0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   1
##   92000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   93000    0   0   0   0   0   0   1   0   0   0   0   0   0   1   1   0
##   95000    0   0   0   0   0   1   0   0   2   0   0   0   0   2   0   0
##   96000    0   0   0   0   0   0   0   0   1   0   0   1   1   0   0   0
##   96500    0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   0   0   0   1   0   0   0   1   0
##   98000    0   0   0   0   0   0   0   0   1   3   1   1   0   1   0   0
##   99000    0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0
##   100000   0   0   0   0   0   0   0   0   2   0   1   0   1   1   0   1
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   1   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   104000   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0   0
##   105000   0   0   0   0   0   0   0   2   0   2   3   0   1   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   107000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   108000   0   0   0   0   0   0   0   0   0   1   0   0   1   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   112000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   115000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   1   1
##   118000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   120000   0   0   0   0   0   0   0   0   0   0   0   0   2   0   0   0
##   126710   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0
##         
##          640 650 660 670 680 700 710 720 730 740 750 760
##   0        6   5   3   4   3   2   4   2   1   1   1   1
##   64000    0   0   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   1   0   0   0   0   0   0   0   0   0
##   78256    0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   1   0   0   0   0   0   0   0   0
##   85000    0   0   1   0   0   1   0   1   0   0   0   0
##   86000    0   0   0   0   1   0   0   0   0   0   0   0
##   88000    0   1   0   0   0   0   0   0   0   0   0   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   0
##   90000    0   1   0   0   0   0   0   0   0   0   0   0
##   92000    0   0   1   0   0   0   1   0   0   0   0   0
##   93000    0   0   0   0   0   0   0   0   0   0   0   0
##   95000    0   0   0   2   0   0   0   0   0   0   0   0
##   96000    0   1   0   0   0   0   0   0   0   0   0   0
##   96500    0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   0   0   0   0   0
##   98000    0   0   0   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   0   0   0   0   0
##   100000   0   2   0   0   0   0   1   0   0   0   0   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   0   0   0
##   101100   0   0   1   0   0   0   0   0   0   0   0   0
##   101600   0   0   0   0   0   0   0   0   0   0   0   0
##   102500   0   0   0   1   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   0
##   104000   0   0   0   0   0   0   0   0   0   0   0   0
##   105000   0   1   0   0   1   0   0   0   0   0   0   0
##   106000   0   0   0   0   2   0   0   0   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   0   0   0
##   107300   0   0   1   0   0   0   0   0   0   0   0   0
##   107500   0   0   0   0   0   0   0   0   0   0   0   0
##   108000   0   0   0   0   0   0   0   0   0   0   0   0
##   110000   1   0   0   0   0   0   0   0   0   0   0   0
##   112000   0   0   0   1   1   0   0   0   0   0   0   0
##   115000   0   0   0   0   0   0   1   0   0   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   0
##   120000   0   0   0   1   0   1   0   0   0   0   0   0
##   126710   0   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   1   0   0   0   0   0   0   0   0   0   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0
##   162000   0   0   0   0   0   1   0   0   0   0   0   0
##   220000   0   0   0   0   0   0   0   0   0   0   0   0

Salary & Language

table(mba1.df$salary, mba1.df$frstlang)
##         
##           1  2
##   0      82  8
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1

Salary and Work-experience

table(mba1.df$salary, mba1.df$work_yrs)
##         
##           0  1  2  3  4  5  6  7  8  9 10 11 12 13 15 16 18 22
##   0       1 12 22 14  9 12  2  5  2  1  1  2  2  1  0  1  1  2
##   64000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   77000   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   78256   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   82000   0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   85000   0  1  2  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   86000   0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88000   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   88500   0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   90000   0  0  2  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   92000   0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   93000   0  0  0  0  1  1  0  0  1  0  0  0  0  0  0  0  0  0
##   95000   1  1  2  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   96000   0  1  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   96500   0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   97000   0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   98000   0  0  7  1  1  0  0  1  0  0  0  0  0  0  0  0  0  0
##   99000   0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##   100000  0  0  6  1  1  0  1  0  0  0  0  0  0  0  0  0  0  0
##   100400  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101000  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   101100  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   101600  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   102500  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   103000  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   104000  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0
##   105000  0  0  4  4  0  1  1  0  0  0  0  0  0  0  0  1  0  0
##   106000  0  0  0  0  0  0  2  0  1  0  0  0  0  0  0  0  0  0
##   107000  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   107300  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   107500  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   108000  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   110000  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##   112000  0  0  1  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0
##   115000  0  2  0  1  2  0  0  0  0  0  0  0  0  0  0  0  0  0
##   118000  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##   120000  0  0  0  1  0  2  0  0  1  0  0  0  0  0  0  0  0  0
##   126710  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   130000  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
##   145800  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   146000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0
##   162000  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   220000  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0

Chi Square Test

Evaluating whether the male students recieve higher salary than that of female students.
task.a<-xtabs(~ sex + salary, data=mba1.df)
chisq.test(task.a)
## Warning in chisq.test(task.a): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  task.a
## X-squared = 55.494, df = 42, p-value = 0.07929

As p-value is greater than 0.05 we conclude that there is no difference between the salaries of males and females.

Evaluating whether those having English as first language recieve higher salary.
task.b<-xtabs(~ frstlang + salary, data=mba1.df)
chisq.test(task.b)
## Warning in chisq.test(task.b): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  task.b
## X-squared = 62.016, df = 42, p-value = 0.02384

Regression Models

mba1.df$frstlang <- as.factor(mba1.df$frstlang)
mba1.df$sex <- as.factor(mba1.df$sex)
str(mba1.df)
## 'data.frame':    193 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 25 25 27 27 28 ...
##  $ sex     : Factor w/ 2 levels "1","2": 2 1 1 1 1 1 2 1 1 2 ...
##  $ gmat_tot: int  620 610 670 570 640 610 650 740 750 540 ...
##  $ gmat_qpc: int  77 90 99 56 82 89 88 99 99 75 ...
##  $ gmat_vpc: int  87 71 78 81 89 74 89 96 98 50 ...
##  $ gmat_tpc: int  87 87 95 75 91 87 92 99 99 65 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.9 3.4 3.3 3.5 3.4 3.6 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.5 3.75 3.5 3.5 4 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 3 1 5 ...
##  $ frstlang: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ satis   : int  7 6 6 7 6 5 6 6 5 5 ...
m1 <- lm(salary~ sex, data=mba1.df)
summary(m1)
## 
## Call:
## lm(formula = salary ~ sex, data = mba1.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56560 -54373  28440  45627 163440 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    54374       4519  12.031   <2e-16 ***
## sex2            2187       8544   0.256    0.798    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53280 on 191 degrees of freedom
## Multiple R-squared:  0.0003429,  Adjusted R-squared:  -0.004891 
## F-statistic: 0.06551 on 1 and 191 DF,  p-value: 0.7983
m2 <- lm(salary~ frstlang, data=mba1.df)
summary(m2)
## 
## Call:
## lm(formula = salary ~ frstlang, data = mba1.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56287 -54876  30124  45124 163713 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    54876       3994  13.739   <2e-16 ***
## frstlang2       1411      14327   0.098    0.922    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53290 on 191 degrees of freedom
## Multiple R-squared:  5.078e-05,  Adjusted R-squared:  -0.005185 
## F-statistic: 0.009699 on 1 and 191 DF,  p-value: 0.9217

TASK 2c: COMPARE THOSE WHO GOT A JOB WITH THOSE WHO DID NOT GET A JOB?

table

table(mba1.df$salary != "0", mba1.df$salary> "0")
##        
##         FALSE TRUE
##   FALSE    90    0
##   TRUE      0  103

Chi-square test.

task.c <- table(mba1.df$salary != "0", mba1.df$salary> "0")
chisq.test(task.c)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  task.c
## X-squared = 189, df = 1, p-value < 2.2e-16