mba.df<-read.csv(paste("MBA Starting Salaries Data.csv",sep=""))
View(mba.df)
mba1.df<-mba.df[ which(mba.df$satis!=998 & mba.df$salary!=999),]
mba2.df<-mba.df[ which(mba.df$salary!=998 & mba.df$salary!=999 & mba.df$salary!=0), ]
summary(mba.df)[,c(1:5,9:12)]
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc quarter work_yrs frstlang
## Min. :16.00 Min. :1.000 Min. : 0.000 Min. :1.000
## 1st Qu.:71.00 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000
## Median :81.00 Median :2.000 Median : 3.000 Median :1.000
## Mean :78.32 Mean :2.478 Mean : 3.872 Mean :1.117
## 3rd Qu.:91.00 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000
## Max. :99.00 Max. :4.000 Max. :22.000 Max. :2.000
## salary
## Min. : 0
## 1st Qu.: 0
## Median : 999
## Mean : 39026
## 3rd Qu.: 97000
## Max. :220000
library(psych)
describe(mba.df)[,c(1:5,9:12)]
## vars n mean sd median max range skew kurtosis
## age 1 274 27.36 3.71 27 48 26 2.16 6.45
## sex 2 274 1.25 0.43 1 2 1 1.16 -0.66
## gmat_tot 3 274 619.45 57.54 620 790 340 -0.01 0.06
## gmat_qpc 4 274 80.64 14.87 83 99 71 -0.92 0.30
## gmat_vpc 5 274 78.32 16.86 81 99 83 -1.04 0.74
## gmat_tpc 6 274 84.20 14.02 87 99 99 -2.28 9.02
## s_avg 7 274 3.03 0.38 3 4 2 -0.06 -0.38
## f_avg 8 274 3.06 0.53 3 4 4 -2.08 10.85
## quarter 9 274 2.48 1.11 2 4 3 0.02 -1.35
## work_yrs 10 274 3.87 3.23 3 22 22 2.78 9.80
## frstlang 11 274 1.12 0.32 1 2 1 2.37 3.65
## salary 12 274 39025.69 50951.56 999 220000 220000 0.70 -1.05
## satis 13 274 172.18 371.61 6 998 997 1.77 1.13
hist(mba.df$age,
main="Age distribution",
xlab="age",
ylab="Frequencies", col="grey" )
mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex,
main="Gender distribution",
col="green" )
hist(mba.df$gmat_tot,
main="Gmat score distribution",
xlab="scores",
ylab="Frequencies", col="grey", breaks = 30 )
mba.df$frstlang=factor(mba.df$frstlang, levels=c(1,2), labels=c("English","other"))
plot(mba.df$frstlang,
main="language distribution",
col="grey" )
hist(mba.df$work_yrs,
main="work experience distribution",
xlab="experience years",
ylab="Frequencies", col="grey", breaks = 30 )
rating <- mba.df[ which(mba.df$satis<='7'), ]
hist(rating$satis,
main="Satisfaction distribution",
xlab="Rating (1=low,7=high)",
ylab="Frequencies", col="grey",)
salary.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]
hist(salary.df$salary,
main="Salary distribution",
xlab="Salary",
ylab="Frequencies", col="grey",)
library(lattice)
boxplot(salary~sex, data=mba2.df,ylab="1 = Male; 2 = Female",xlab="Salary", horizontal=TRUE )
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(mba2.df$age, mba2.df$salary,
xlab="age", ylab="salary", main = "salary vs age")
scatterplot(mba1.df$work_yrs, mba1.df$salary, xlab="Work experience (years) of those who got jobs", ylab="Salary", main="Salary vs Work Experience")
boxplot(mba2.df$frstlang, mba2.df$salary, xlab="1 = English Speaking 2 = others",ylab="Salary",main="Salary & language")
library(corrgram)
corrgram(mba2.df, order=FALSE, lower.panel=panel.shade, upper.panel=panel.pie, diag.panel=panel.minmax, text.panel=panel.txt, main="Corrgram of mba starting salaries")
table(mba1.df$salary, mba1.df$sex)
##
## 1 2
## 0 67 23
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
table(mba1.df$salary, mba1.df$age)
##
## 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 39 40 42 43 48
## 0 1 3 13 9 10 14 6 11 2 2 5 0 3 3 2 1 1 0 1 2 1
## 64000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 86000 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 92000 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 95000 0 0 1 5 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 96000 0 0 1 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 98000 0 1 3 2 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 99000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 100000 0 1 4 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 100400 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 1 1 2 3 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0
## 106000 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0
## 107000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
## 115000 0 0 1 1 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 120000 0 0 0 0 0 1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
table(mba1.df$salary, mba1.df$gmat_tot)
##
## 450 480 500 510 520 530 540 550 560 570 580 590 600 610 620 630
## 0 1 1 0 2 0 3 3 4 8 7 4 3 3 9 4 5
## 64000 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 90000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
## 92000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 93000 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0
## 95000 0 0 0 0 0 1 0 0 2 0 0 0 0 2 0 0
## 96000 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## 98000 0 0 0 0 0 0 0 0 1 3 1 1 0 1 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 100000 0 0 0 0 0 0 0 0 2 0 1 0 1 1 0 1
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 104000 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0
## 105000 0 0 0 0 0 0 0 2 0 2 3 0 1 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 108000 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 115000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1
## 118000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 120000 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0
## 126710 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
##
## 640 650 660 670 680 700 710 720 730 740 750 760
## 0 6 5 3 4 3 2 4 2 1 1 1 1
## 64000 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 1 0 0 0 0 0 0 0 0
## 85000 0 0 1 0 0 1 0 1 0 0 0 0
## 86000 0 0 0 0 1 0 0 0 0 0 0 0
## 88000 0 1 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 0
## 90000 0 1 0 0 0 0 0 0 0 0 0 0
## 92000 0 0 1 0 0 0 1 0 0 0 0 0
## 93000 0 0 0 0 0 0 0 0 0 0 0 0
## 95000 0 0 0 2 0 0 0 0 0 0 0 0
## 96000 0 1 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 0 0
## 98000 0 0 0 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0 0
## 100000 0 2 0 0 0 0 1 0 0 0 0 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 1 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 1 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 1 0 0 1 0 0 0 0 0 0 0
## 106000 0 0 0 0 2 0 0 0 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0 0 0 0
## 110000 1 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 0 1 1 0 0 0 0 0 0 0
## 115000 0 0 0 0 0 0 1 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 0
## 120000 0 0 0 1 0 1 0 0 0 0 0 0
## 126710 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 1 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 1 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0
table(mba1.df$salary, mba1.df$frstlang)
##
## 1 2
## 0 82 8
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
table(mba1.df$salary, mba1.df$work_yrs)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 18 22
## 0 1 12 22 14 9 12 2 5 2 1 1 2 2 1 0 1 1 2
## 64000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 0 0 0 0 1 0 0
## 106000 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
## 115000 0 2 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0 0 0 0 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
task.a<-xtabs(~ sex + salary, data=mba1.df)
chisq.test(task.a)
## Warning in chisq.test(task.a): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: task.a
## X-squared = 55.494, df = 42, p-value = 0.07929
As p-value is greater than 0.05 we conclude that there is no difference between the salaries of males and females.
task.b<-xtabs(~ frstlang + salary, data=mba1.df)
chisq.test(task.b)
## Warning in chisq.test(task.b): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: task.b
## X-squared = 62.016, df = 42, p-value = 0.02384
mba1.df$frstlang <- as.factor(mba1.df$frstlang)
mba1.df$sex <- as.factor(mba1.df$sex)
str(mba1.df)
## 'data.frame': 193 obs. of 13 variables:
## $ age : int 23 24 24 24 24 25 25 27 27 28 ...
## $ sex : Factor w/ 2 levels "1","2": 2 1 1 1 1 1 2 1 1 2 ...
## $ gmat_tot: int 620 610 670 570 640 610 650 740 750 540 ...
## $ gmat_qpc: int 77 90 99 56 82 89 88 99 99 75 ...
## $ gmat_vpc: int 87 71 78 81 89 74 89 96 98 50 ...
## $ gmat_tpc: int 87 87 95 75 91 87 92 99 99 65 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.9 3.4 3.3 3.5 3.4 3.6 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.5 3.75 3.5 3.5 4 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 3 1 5 ...
## $ frstlang: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 0 0 0 0 0 0 ...
## $ satis : int 7 6 6 7 6 5 6 6 5 5 ...
m1 <- lm(salary~ sex, data=mba1.df)
summary(m1)
##
## Call:
## lm(formula = salary ~ sex, data = mba1.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56560 -54373 28440 45627 163440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54374 4519 12.031 <2e-16 ***
## sex2 2187 8544 0.256 0.798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 53280 on 191 degrees of freedom
## Multiple R-squared: 0.0003429, Adjusted R-squared: -0.004891
## F-statistic: 0.06551 on 1 and 191 DF, p-value: 0.7983
m2 <- lm(salary~ frstlang, data=mba1.df)
summary(m2)
##
## Call:
## lm(formula = salary ~ frstlang, data = mba1.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56287 -54876 30124 45124 163713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54876 3994 13.739 <2e-16 ***
## frstlang2 1411 14327 0.098 0.922
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 53290 on 191 degrees of freedom
## Multiple R-squared: 5.078e-05, Adjusted R-squared: -0.005185
## F-statistic: 0.009699 on 1 and 191 DF, p-value: 0.9217
table(mba1.df$salary != "0", mba1.df$salary> "0")
##
## FALSE TRUE
## FALSE 90 0
## TRUE 0 103
task.c <- table(mba1.df$salary != "0", mba1.df$salary> "0")
chisq.test(task.c)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: task.c
## X-squared = 189, df = 1, p-value < 2.2e-16