library(readr)
MBA_Starting_Salaries_Data_ <- read_csv("MBA Starting Salaries Data .csv")
## Parsed with column specification:
## cols(
## age = col_integer(),
## sex = col_integer(),
## gmat_tot = col_integer(),
## gmat_qpc = col_integer(),
## gmat_vpc = col_integer(),
## gmat_tpc = col_integer(),
## s_avg = col_double(),
## f_avg = col_double(),
## quarter = col_integer(),
## work_yrs = col_integer(),
## frstlang = col_integer(),
## salary = col_integer(),
## satis = col_integer()
## )
View(MBA_Starting_Salaries_Data_)
library(psych)
describe(MBA_Starting_Salaries_Data_)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
hist(MBA_Starting_Salaries_Data_$gmat_tot,breaks = 6,col="green",main="Gmat total of MBA Graduates")
Most of the MBA graduates has their gmat total between 550-650
hist(MBA_Starting_Salaries_Data_$sex,col=c("green","red"))
Most of the MBA graduates in the sample were male
hist(MBA_Starting_Salaries_Data_$frstlang,col=c("pink","blue"))
MOST of the MBA graduates has english as their first language
hist(MBA_Starting_Salaries_Data_$work_yrs,col="orange")
boxplot(MBA_Starting_Salaries_Data_$salary ,main="Salary ",
col=c("purple"),horizontal=TRUE,
xlab="Salary" )
mytable<-with(MBA_Starting_Salaries_Data_,table(satis))
mytable
## satis
## 1 2 3 4 5 6 7 998
## 1 1 5 17 74 97 33 46
here 33 graduates 33 graduates shows very high satisfication of their mba program
boxplot(MBA_Starting_Salaries_Data_$gmat_qpc,MBA_Starting_Salaries_Data_$gmat_vpc,horizontal = TRUE,col = c("red","pink"))
boxplot(MBA_Starting_Salaries_Data_$s_avg,MBA_Starting_Salaries_Data_$f_avg,col=c("green","yellow"),horizontal=TRUE)
#SALARY VS SEX
aggregate(salary~sex,data=MBA_Starting_Salaries_Data_,mean)
## sex salary
## 1 1 37013.62
## 2 2 45121.07
The mean salary of female is higher than male
boxplot(salary~sex,data=MBA_Starting_Salaries_Data_,horizontal=TRUE,main="salary vs sex",col=c("blue","orange"))
#salary vs age
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(salary~age,data=MBA_Starting_Salaries_Data_,xlab="age",ylab="salary",main="salary vs age")
#SALARY VS GMAT SCORE
library(car)
scatterplot(salary~gmat_tot,data=MBA_Starting_Salaries_Data_,xlab = "gmat total",ylab="salary",main="salary vs gmat score")
aggregate(salary~frstlang,data=MBA_Starting_Salaries_Data_,mean)
## frstlang salary
## 1 1 40627.12
## 2 2 26914.84
The mean salary of graduates who has english as their first language is higher than the graduates who has other languages as first language
boxplot(salary~frstlang,data=MBA_Starting_Salaries_Data_,horizontal=TRUE,main="salary vs first language",col=c("blue","orange"))
#SALARY VS WORK EXPERIENCE
library(car)
scatterplot(salary~work_yrs,data=MBA_Starting_Salaries_Data_,xlab = "work experience",ylab="salary",main="salary vs work experience")
library(corrgram)
corrgram(MBA_Starting_Salaries_Data_, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="corrogram")
newdata <- MBA_Starting_Salaries_Data_[ which(MBA_Starting_Salaries_Data_$salary>'0' ), ]
describe(newdata)
## vars n mean sd median trimmed mad min max
## age 1 184 26.79 2.77 26.0 26.45 1.48 22.0 40
## sex 2 184 1.24 0.43 1.0 1.18 0.00 1.0 2
## gmat_tot 3 184 621.96 54.76 620.0 621.69 59.30 450.0 790
## gmat_qpc 4 184 81.49 13.67 83.0 82.99 14.83 39.0 99
## gmat_vpc 5 184 78.66 17.24 83.0 80.84 17.79 16.0 99
## gmat_tpc 6 184 85.13 12.94 87.5 86.86 11.12 0.0 99
## s_avg 7 184 3.02 0.38 3.0 3.02 0.44 2.2 4
## f_avg 8 184 3.06 0.51 3.0 3.08 0.37 0.0 4
## quarter 9 184 2.45 1.13 2.0 2.43 1.48 1.0 4
## work_yrs 10 184 3.52 2.49 3.0 3.11 1.48 0.0 16
## frstlang 11 184 1.13 0.34 1.0 1.04 0.00 1.0 2
## salary 12 184 58114.34 52511.21 88250.0 56402.47 41883.45 998.0 220000
## satis 13 184 253.65 430.93 6.0 193.55 1.48 1.0 998
## range skew kurtosis se
## age 18.0 1.91 5.94 0.20
## sex 1.0 1.18 -0.61 0.03
## gmat_tot 340.0 -0.07 0.30 4.04
## gmat_qpc 60.0 -0.86 0.16 1.01
## gmat_vpc 83.0 -1.09 0.82 1.27
## gmat_tpc 99.0 -2.36 10.16 0.95
## s_avg 1.8 0.12 -0.67 0.03
## f_avg 4.0 -2.21 12.15 0.04
## quarter 3.0 0.04 -1.40 0.08
## work_yrs 16.0 2.69 9.70 0.18
## frstlang 1.0 2.18 2.75 0.02
## salary 219002.0 0.02 -1.41 3871.18
## satis 997.0 1.15 -0.69 31.77
mytable<-xtabs(~salary+sex,newdata)
mytable
## sex
## salary 1 2
## 998 37 9
## 999 30 5
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
t.test(salary~sex,data=newdata)
##
## Welch Two Sample t-test
##
## data: salary by sex
## t = -1.5302, df = 78.552, p-value = 0.13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -30667.267 4010.795
## sample estimates:
## mean in group 1 mean in group 2
## 54854.72 68182.96
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 64.319, df = 43, p-value = 0.0192
There is a significant difference between salary of the male and female graduate From mean score from test female gets higher salary
mytable1<-xtabs(~salary+frstlang,newdata)
mytable1
## frstlang
## salary 1 2
## 998 38 8
## 999 26 9
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
t.test(salary~frstlang,data=newdata)
##
## Welch Two Sample t-test
##
## data: salary by frstlang
## t = 1.984, df = 28.139, p-value = 0.05708
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -823.4068 51947.5402
## sample estimates:
## mean in group 1 mean in group 2
## 61448.53 35886.46
chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable1
## X-squared = 48.273, df = 43, p-value = 0.2682
we aceept null hypothesis there is no significant difference
mytable2<-xtabs(~salary+gmat_tot,newdata)
mytable2
## gmat_tot
## salary 450 460 500 520 530 540 550 560 570 580 590 600 610 620 630 640
## 998 1 1 0 0 0 0 0 3 0 2 2 5 4 0 7 3
## 999 0 0 1 0 0 0 1 2 4 1 2 3 0 4 2 2
## 64000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 90000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 93000 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0
## 95000 0 0 0 0 1 0 0 2 0 0 0 0 2 0 0 0
## 96000 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0
## 98000 0 0 0 0 0 0 0 1 3 1 1 0 1 0 0 0
## 99000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 0 0 0 0 0 2 0 1 0 1 1 0 1 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 101000 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 104000 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 0 0 2 0 2 3 0 1 0 1 0 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 108000 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 112000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 115000 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0
## 126710 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 650 660 670 680 690 700 710 720 730 740 790
## 998 2 3 5 2 2 0 1 1 0 2 0
## 999 2 3 1 1 2 0 1 0 1 1 1
## 64000 0 0 0 0 0 0 0 0 0 0 0
## 77000 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 1 0 0 0 0 0 0 0 0
## 85000 0 1 0 0 0 1 0 1 0 0 0
## 86000 0 0 0 1 0 0 0 0 0 0 0
## 88000 1 0 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 0 0 0 0 0 0 0 0
## 90000 1 0 0 0 0 0 0 0 0 0 0
## 92000 0 1 0 0 0 0 1 0 0 0 0
## 93000 0 0 0 0 0 0 0 0 0 0 0
## 95000 0 0 2 0 0 0 0 0 0 0 0
## 96000 1 0 0 0 0 0 0 0 0 0 0
## 96500 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 0 0 0 0
## 98000 0 0 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 0 0 0 0 0 0
## 100000 2 0 0 0 0 0 1 0 0 0 0
## 100400 0 0 0 0 0 0 0 0 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 0 0
## 101100 0 1 0 0 0 0 0 0 0 0 0
## 101600 0 0 0 0 0 0 0 0 0 0 0
## 102500 0 0 1 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 0 0 0 0 0 0 0
## 105000 1 0 0 1 0 0 0 0 0 0 0
## 106000 0 0 0 2 0 0 0 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 0 0
## 107300 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0
## 112000 0 0 1 1 0 0 0 0 0 0 0
## 115000 0 0 0 0 0 0 1 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0
## 120000 0 0 1 0 0 1 0 0 0 0 0
## 126710 0 0 0 0 0 0 0 0 0 0 0
## 130000 1 0 0 0 0 0 0 0 0 0 0
## 145800 0 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0
## 162000 0 0 0 0 0 1 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0
chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable2
## X-squared = 1121.5, df = 1118, p-value = 0.465
mytable3<-xtabs(~salary+work_yrs,newdata)
mytable3
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 9 10 15 16
## 998 0 2 16 13 12 0 1 1 1 0 0 0 0
## 999 1 2 6 8 11 2 2 2 0 1 0 0 0
## 64000 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 0 1 0
chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable3
## X-squared = 773.16, df = 516, p-value = 1.433e-12
we reject null hypothesis
fit <- lm(salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + work_yrs + satis,
data=MBA_Starting_Salaries_Data_)
summary(fit)
##
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc +
## gmat_tpc + work_yrs + satis, data = MBA_Starting_Salaries_Data_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64763 -45163 -3221 43605 186789
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 214629.575 65128.160 3.295 0.00112 **
## age -3948.105 1530.064 -2.580 0.01041 *
## sex 3730.050 6834.469 0.546 0.58568
## gmat_tot -299.779 209.269 -1.433 0.15318
## gmat_qpc 338.156 582.841 0.580 0.56228
## gmat_vpc 475.523 527.377 0.902 0.36805
## gmat_tpc 522.828 417.568 1.252 0.21164
## work_yrs 3391.302 1754.338 1.933 0.05429 .
## satis -47.715 7.856 -6.073 4.33e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47600 on 265 degrees of freedom
## Multiple R-squared: 0.1528, Adjusted R-squared: 0.1272
## F-statistic: 5.974 on 8 and 265 DF, p-value: 4.728e-07
fit1 <- lm(salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc +s_avg+f_avg+ work_yrs + satis+quarter+frstlang,
data=MBA_Starting_Salaries_Data_)
summary(fit1)
##
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc +
## gmat_tpc + s_avg + f_avg + work_yrs + satis + quarter + frstlang,
## data = MBA_Starting_Salaries_Data_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77353 -42055 -4193 43432 204537
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 215476.47 74731.74 2.883 0.00426 **
## age -3841.82 1578.26 -2.434 0.01560 *
## sex 1810.69 6853.24 0.264 0.79183
## gmat_tot -278.09 209.44 -1.328 0.18540
## gmat_qpc 334.82 578.64 0.579 0.56333
## gmat_vpc 294.13 550.38 0.534 0.59351
## gmat_tpc 512.59 417.03 1.229 0.22012
## s_avg 12836.84 12919.75 0.994 0.32135
## f_avg -6371.67 6636.87 -0.960 0.33792
## work_yrs 2881.46 1784.27 1.615 0.10753
## satis -47.17 7.84 -6.016 6.01e-09 ***
## quarter -5443.82 4050.86 -1.344 0.18016
## frstlang -3058.63 10365.65 -0.295 0.76817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47130 on 261 degrees of freedom
## Multiple R-squared: 0.1818, Adjusted R-squared: 0.1442
## F-statistic: 4.834 on 12 and 261 DF, p-value: 3.555e-07
fit2 <- lm(salary ~ age + sex + gmat_tot + work_yrs + satis+frstlang,
data=MBA_Starting_Salaries_Data_)
summary(fit2)
##
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + work_yrs + satis +
## frstlang, data = MBA_Starting_Salaries_Data_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63826 -44997 -2531 43591 184589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 163652.153 50074.980 3.268 0.00122 **
## age -4039.139 1543.341 -2.617 0.00937 **
## sex 5030.505 6696.183 0.751 0.45316
## gmat_tot -29.586 51.836 -0.571 0.56865
## work_yrs 3428.567 1777.157 1.929 0.05476 .
## satis -46.713 7.901 -5.912 1.03e-08 ***
## frstlang -6545.915 9235.808 -0.709 0.47910
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47700 on 267 degrees of freedom
## Multiple R-squared: 0.1429, Adjusted R-squared: 0.1237
## F-statistic: 7.421 on 6 and 267 DF, p-value: 2.322e-07
fit3 <- lm(salary ~ age + work_yrs + satis,
data=MBA_Starting_Salaries_Data_)
summary(fit3)
##
## Call:
## lm(formula = salary ~ age + work_yrs + satis, data = MBA_Starting_Salaries_Data_)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63123 -46260 -1564 45467 185324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 149985.482 36096.495 4.155 4.37e-05 ***
## age -4287.696 1514.706 -2.831 0.00499 **
## work_yrs 3765.669 1734.895 2.171 0.03084 *
## satis -47.858 7.808 -6.129 3.12e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47550 on 270 degrees of freedom
## Multiple R-squared: 0.1387, Adjusted R-squared: 0.1291
## F-statistic: 14.49 on 3 and 270 DF, p-value: 8.813e-09
The p value here is highly less than 0.05 which means the model is good fit