mbastart <- read.csv(paste("mbastart.csv",sep = ""))
View(mbastart)
library(psych)
describe(mbastart)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
str(mbastart)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
par(mfrow=c(1,2))
boxplot(mbastart$age,col="lightblue", ylab = "Age")
hist(mbastart$age,col="peachpuff", xlab="Age in years",main="Age Distribution")
mbastart$sex=factor(mbastart$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mbastart$sex,col = "lightblue",main = "Gender distribution")
par(mfrow=c(1,2))
boxplot(mbastart$gmat_tot,ylab="GMAT total score",col="lightblue")
hist(mbastart$gmat_tot, xlab="GMAT total score",main="Distribution of GMAT scores", breaks=20,col="lightgreen")
par(mfrow= c(1,2))
boxplot(mbastart$gmat_qpc, ylab = "quantitative GMAT percentile",col="lightblue")
hist(mbastart$gmat_qpc,col="peachpuff", xlab = "quantitative GMAT percentile", main ="Distribution of quantitative GMAT percentile")
par(mfrow= c(1,2))
boxplot(mbastart$gmat_vpc, ylab = "verbal GMAT percentile",col ="lightblue")
hist(mbastart$gmat_vpc,col="lightgreen", xlab = "verbal GMAT percentile", main = "Distribution of verbal GMAT percentile")
par(mfrow= c(1,2))
boxplot(mbastart$gmat_tpc, ylab = "Total GMAT percentile",col="yellow")
hist(mbastart$gmat_tpc,col="lightblue", xlab = "Total GMAT percentile", main = "Total GMAT percentile Distribution")
par(mfrow=c(1,2))
boxplot(mbastart$s_avg, ylab = "spring MBA average",col="yellow")
hist(mbastart$s_avg, col="lightblue",xlab = "spring MBA average", main = "spring MBA average Distribution")
par(mfrow=c(1,2))
boxplot(mbastart$f_avg, ylab = "fall MBA average",col="peachpuff")
hist(mbastart$f_avg,col="lightblue", xlab = "fall MBA average", main = "fall MBA average Distribution")
par(mfrow=c(1,2))
boxplot(mbastart$quarter, ylab = "quartile ranking",col="lightblue")
hist(mbastart$quarter,col="lightgreen",xlab = "quartile ranking", main = "quartile ranking Distribution")
par(mfrow=c(1,2))
boxplot(mbastart$work_yrs, ylab = "years of work experience",col="lightblue")
hist(mbastart$work_yrs,col="yellow",xlab="No. of years of work experience",main = "Work Experience",breaks = 20)
mbastart$frstlang = factor(mbastart$frstlang, levels=c(1,2), labels=c("English","Others"))
plot(mbastart$frstlang,col="lightblue",main = "Language Distribution")
newdata <- mbastart[ which(mbastart$satis<='7'), ]
hist(newdata$satis,breaks =5,col="lightgreen",xlab="Degree of Satisfaction (1=low,7=high)", main="Satisfaction distribution")
newdata1 <- mbastart[ which(mbastart$salary !="998" & mbastart$salary !="999"), ]
hist(newdata1$salary, breaks=5,col="pink",xlab="starting salary", main="Salary distribution")
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(salary ~age, data=newdata1,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of salary vs age",
xlab="age",
ylab="salary")
scatterplot(salary ~sex, data=newdata1,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of salary vs sex",
xlab="sex",
ylab="salary")
scatterplot(salary ~frstlang, data=newdata1,
main="Scatter plot of salary vs first language",
xlab="first language",
ylab="salary")
scatterplot(salary ~gmat_tot, data=newdata1,
spread=FALSE, smoother.args=list(lty=3),
main="Scatter plot of salary vs Gmat total",
xlab="Gmat score",
ylab="salary")
library(corrgram)
corrgram(newdata1, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA starting salary analysis Correlogram")
x <- newdata1[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
y <- newdata1[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
cov(x,y)
## age gmat_tot gmat_qpc gmat_vpc gmat_tpc
## age 1.778562e+01 -29.954933 -14.089729 -0.4564443 -7.5127645
## gmat_tot -2.995493e+01 3196.950561 636.350928 685.4644322 672.4651878
## gmat_qpc -1.408973e+01 636.350928 229.384067 42.7985481 141.4933074
## gmat_vpc -4.564443e-01 685.464432 42.798548 259.2695920 149.8747571
## gmat_tpc -7.512764e+00 672.465188 141.493307 149.8747571 183.0113882
## s_avg 2.626913e-01 3.076706 0.109287 1.1636153 0.9688199
## f_avg -7.513817e-02 2.969557 1.025241 0.2769703 0.7718585
## work_yrs 1.355880e+01 -36.222204 -13.484078 -2.4562014 -8.2897776
## salary -2.918528e+04 -170.881369 22855.717832 2901.3078044 43822.5291991
## s_avg f_avg work_yrs salary
## age 0.2626913 -0.07513817 1.355880e+01 -2.918528e+04
## gmat_tot 3.0767055 2.96955689 -3.622220e+01 -1.708814e+02
## gmat_qpc 0.1092870 1.02524072 -1.348408e+01 2.285572e+04
## gmat_vpc 1.1636153 0.27697026 -2.456201e+00 2.901308e+03
## gmat_tpc 0.9688199 0.77185854 -8.289778e+00 4.382253e+04
## s_avg 0.1436561 0.10251263 2.224652e-01 1.940528e+03
## f_avg 0.1025126 0.26995964 -9.189254e-02 2.443157e+02
## work_yrs 0.2224652 -0.09189254 1.360379e+01 -1.044263e+04
## salary 1940.5276360 244.31568869 -1.044263e+04 2.825177e+09
job <- mbastart[ which(mbastart$salary !="998" & mbastart$salary !="999" & mbastart$salary!="0"), ]
head(job)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35 22 Female 660 90 92 94 3.5 3.75 1
## 36 27 Female 700 94 98 98 3.3 3.25 1
## 37 25 Female 680 87 96 96 3.5 2.67 1
## 38 25 Female 650 82 91 93 3.4 3.25 1
## 39 27 Male 710 96 96 98 3.3 3.50 1
## 40 28 Female 620 52 98 87 3.4 3.75 1
## work_yrs frstlang salary satis
## 35 1 English 85000 5
## 36 2 English 85000 6
## 37 2 English 86000 5
## 38 3 English 88000 7
## 39 2 English 92000 6
## 40 5 English 93000 5
mytable <-xtabs(~sex+salary,data=job)
mytable
## salary
## sex 64000 77000 78256 82000 85000 86000 88000 88500 90000 92000 93000
## Male 0 1 0 0 1 0 0 1 3 2 2
## Female 1 0 1 1 3 2 1 0 0 1 1
## salary
## sex 95000 96000 96500 97000 98000 99000 100000 100400 101000 101100
## Male 4 3 1 2 6 0 4 1 0 1
## Female 3 1 0 0 4 1 5 0 2 0
## salary
## sex 101600 102500 103000 104000 105000 106000 107000 107300 107500
## Male 1 1 1 2 11 2 1 1 1
## Female 0 0 0 0 0 1 0 0 0
## salary
## sex 108000 110000 112000 115000 118000 120000 126710 130000 145800
## Male 2 0 3 5 1 3 1 1 1
## Female 0 1 0 0 0 1 0 0 0
## salary
## sex 146000 162000 220000
## Male 1 1 0
## Female 0 0 1
From this table we see that most higher starting salaries have been awarded to men.
mytable1 <-xtabs(~salary+work_yrs,data=job)
mytable1
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 10 15 16
## 64000 0 0 1 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 1 0
From the above table that a minimum of 2 years of work experience is necessary
mytable2<-xtabs(~salary+frstlang,data=job)
mytable2
## frstlang
## salary English Others
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
It is seen that students with English as first language are mostly preferred and get higher salaries and jobs compared to those whose first language is not English.
mytable3<-xtabs(~salary+gmat_tot,data=job)
mytable3
## gmat_tot
## salary 500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
## 64000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 90000 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 93000 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
## 95000 0 0 1 0 0 2 0 0 0 0 2 0 0 0 0 0
## 96000 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## 98000 0 0 0 0 0 1 3 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 2 0 1 0 1 1 0 1 0 2 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104000 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 2 0 2 3 0 1 0 1 0 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 108000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 112000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 670 680 700 710 720
## 64000 0 0 0 0 0
## 77000 0 0 0 0 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 1 0 1
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 0 0 1 0
## 93000 0 0 0 0 0
## 95000 2 0 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 1 1 0 1 0
## 99000 0 0 0 0 0
## 100000 0 0 0 1 0
## 100400 0 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 1 0 0 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 2 0 0 0
## 107000 0 0 0 0 0
## 107300 0 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 1 1 0 0 0
## 115000 0 0 0 1 0
## 118000 0 0 0 0 0
## 120000 1 0 1 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 0 0 0 0 0
## 162000 0 0 1 0 0
## 220000 0 0 0 0 0
chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable1
## X-squared = 535.23, df = 451, p-value = 0.003809
Since p<0.01 there is a relationship betweeen work experience and salary
chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable2
## X-squared = 69.847, df = 41, p-value = 0.003296
Since p<0.01 we can say that there is a relationship between first language and salary
chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable3
## X-squared = 927.24, df = 820, p-value = 0.005279
Since p<0.01 we see there exists a relationship between Total GMAT score and starting salary.
Model 1:
model1 <- lm(job$salary ~job$gmat_tot+job$gmat_qpc+job$gmat_vpc+job$gmat_tpc, data = job)
summary(model1)
##
## Call:
## lm(formula = job$salary ~ job$gmat_tot + job$gmat_qpc + job$gmat_vpc +
## job$gmat_tpc, data = job)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40370 -8250 -2164 5253 100097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109539.54 48054.24 2.279 0.0248 *
## job$gmat_tot 55.01 181.71 0.303 0.7627
## job$gmat_qpc 718.40 541.90 1.326 0.1880
## job$gmat_vpc 546.10 543.85 1.004 0.3178
## job$gmat_tpc -1663.16 801.57 -2.075 0.0406 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared: 0.06089, Adjusted R-squared: 0.02256
## F-statistic: 1.589 on 4 and 98 DF, p-value: 0.1834
Gmat_tpc is a significant variable in this model. The multiple R squared value indicates that the model accounts for 6% of the variance in the variables The residual error (17670) can be thought of as the average error in predicting salary using the various gmat data available
Model 2:
model2<- lm(job$salary ~job$satis+job$work_yrs+job$frstlang, data = job)
summary(model2)
##
## Call:
## lm(formula = job$salary ~ job$satis + job$work_yrs + job$frstlang,
## data = job)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31764 -9640 -604 4816 76193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104142.2 11899.4 8.752 5.73e-14 ***
## job$satis -1913.1 2000.0 -0.957 0.3411
## job$work_yrs 2506.8 528.6 4.742 7.11e-06 ***
## job$frstlangOthers 13541.5 6305.7 2.147 0.0342 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15740 on 99 degrees of freedom
## Multiple R-squared: 0.2466, Adjusted R-squared: 0.2237
## F-statistic: 10.8 on 3 and 99 DF, p-value: 3.354e-06
work_yrs and frstlang are significant variables in model 2 The multiple R squared value indicates that the model accounts for 24.66% of the variance in the variables The residual error(15740) can be thought of as the average error in predicting salary using work experience, job satisfaction and first language.
Model 3:
model3 <- lm(job$salary ~job$age+job$sex, data = job)
summary(model3)
##
## Call:
## lm(formula = job$salary ~ job$age + job$sex, data = job)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29047 -9444 -1750 5428 84503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33116.2 12997.5 2.548 0.0124 *
## job$age 2653.1 475.1 5.584 2.03e-07 ***
## job$sexFemale -3743.6 3372.6 -1.110 0.2697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15540 on 100 degrees of freedom
## Multiple R-squared: 0.2588, Adjusted R-squared: 0.244
## F-statistic: 17.46 on 2 and 100 DF, p-value: 3.144e-07
Age is a significant factor in model 3.
nojob<- mbastart[ which(mbastart$salary !="998" & mbastart$salary !="999" & mbastart$salary==0), ]
head(nojob)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 1 23 Female 620 77 87 87 3.4 3.00 1
## 2 24 Male 610 90 71 87 3.5 4.00 1
## 3 24 Male 670 99 78 95 3.3 3.25 1
## 4 24 Male 570 56 81 75 3.3 2.67 1
## 6 24 Male 640 82 89 91 3.9 3.75 1
## 7 25 Male 610 89 74 87 3.4 3.50 1
## work_yrs frstlang salary satis
## 1 2 English 0 7
## 2 2 English 0 6
## 3 2 English 0 6
## 4 1 English 0 7
## 6 2 English 0 6
## 7 2 English 0 5
View(nojob)
chisq.test(nojob$work_yrs,nojob$satis)
## Warning in chisq.test(nojob$work_yrs, nojob$satis): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: nojob$work_yrs and nojob$satis
## X-squared = 44.974, df = 48, p-value = 0.5976
The p-value is small enough (<0.05) so we reject the null hypothesis It means The salary is affected by satisfaction rate of the MBA course studies by the students.
chisq.test(aggregate(mbastart$salary, by=list(Overall_GMAT_Percentile = mbastart$gmat_tpc), mean))
## Warning in chisq.test(aggregate(mbastart$salary, by =
## list(Overall_GMAT_Percentile = mbastart$gmat_tpc), : Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: aggregate(mbastart$salary, by = list(Overall_GMAT_Percentile = mbastart$gmat_tpc), mean)
## X-squared = 173100, df = 41, p-value < 2.2e-16
The p-value is small enough (<0.05) so we reject the null hypothesis It means The GMAT Percentile affects the students who got placed and who did not got placed