mba.df <- read.csv("MBA Starting Salaries Data.csv")
View(mba.df)
library(psych)
describe(mba.df)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
par(mfrow=c(1,2))
age <- table(mba.df$age)
boxplot(mba.df$age,main = "Boxplot of Age",ylab="age",col= "lightblue")
barplot(age,main = "AGE",xlab = "Ages",ylab = "Count")
mba.df$sex=factor(mba.df$sex, levels=c(1,2), labels=c("Male","Female"))
plot(mba.df$sex,main ="GENDER DITRIBUTION", col ="lightgreen")
par(mfrow=c(1,2))
hist(mba.df$gmat_tot,main = " GMAT TOTAL SCORE",col ="green",xlab = "gmat scores")
boxplot(mba.df$gmat_tot,main="BOXPLOT GMAT TOTAL",col="green")
hist(mba.df$work_yrs,col="pink",xlab="No. of years of work experience",main = "Work Experience",breaks = 20)
mba.df$frstlang <- factor(mba.df$frstlang, levels = c(1,2), labels = c("English","Others"))
plot(mba.df$frstlang)
newlevel <- mba.df[which(mba.df$satis<='7'),]
hist(newlevel$satis,breaks =5,col="green",xlab="Degree of Satisfaction (1=low,7=high)", main="Satisfaction distribution")
job.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999"), ]
hist(job.df$salary, breaks=5,col="purple",xlab="starting salary", main="Salary distribution")
par(mfrow=c(1,1))
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(salary ~age, data=job.df,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of salary vs age",
xlab="age",
ylab="salary")
par(mfrow=c(1,1))
options(scipen=999)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
data("job.df", package = "ggplot2")
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
gg<-ggplot(job.df, aes(x=sex, y=salary))+geom_boxplot(, size =2)+
labs(title="SEX VS SALARY", y="Salary", x="SEX")
plot(gg)
options(scipen=999)
library(ggplot2)
data("job.df",package = "ggplot2")
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
ggplot(job.df,aes(x= gmat_tot,y=salary)) +geom_point(col="blue",size=2)+
geom_smooth(method = "lm")+
labs(title="SCATTERPLOT",subtitle="SALARY VS GMAT TOTAL", x="GMAT TOTAL", y="SALARY")
library(car)
scatterplot(salary~work_yrs,data = job.df, main="SCATTERPLOT OF SALARY VS WORKEXPERIENCE", xlab="Work Experience in Years", ylab = "Salary")
par(mfrow=c(1,1))
options(scipen=999)
library(ggplot2)
data("job.df", package = "ggplot2")
## Warning in data("job.df", package = "ggplot2"): data set 'job.df' not found
gg<-ggplot(job.df, aes(x=frstlang, y=salary))+geom_boxplot(, size =2)+
labs(title="SALARY VS First Language", y="Salary", x="First Language")
plot(gg)
library(corrgram)
corrgram(job.df, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA starting salary analysis Correlogram")
x <- job.df[,c("age", "gmat_tot", "gmat_qpc","gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
y <- job.df[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
cov(x,y)
## age gmat_tot gmat_qpc gmat_vpc
## age 17.78562176 -29.954933 -14.089729 -0.4564443
## gmat_tot -29.95493307 3196.950561 636.350928 685.4644322
## gmat_qpc -14.08972906 636.350928 229.384067 42.7985481
## gmat_vpc -0.45644430 685.464432 42.798548 259.2695920
## gmat_tpc -7.51276446 672.465188 141.493307 149.8747571
## s_avg 0.26269133 3.076706 0.109287 1.1636153
## f_avg -0.07513817 2.969557 1.025241 0.2769703
## work_yrs 13.55880289 -36.222204 -13.484078 -2.4562014
## salary -29185.28497409 -170.881369 22855.717832 2901.3078044
## gmat_tpc s_avg f_avg work_yrs
## age -7.5127645 0.2626913 -0.07513817 13.55880289
## gmat_tot 672.4651878 3.0767055 2.96955689 -36.22220423
## gmat_qpc 141.4933074 0.1092870 1.02524072 -13.48407815
## gmat_vpc 149.8747571 1.1636153 0.27697026 -2.45620142
## gmat_tpc 183.0113882 0.9688199 0.77185854 -8.28977763
## s_avg 0.9688199 0.1436561 0.10251263 0.22246519
## f_avg 0.7718585 0.1025126 0.26995964 -0.09189254
## work_yrs -8.2897776 0.2224652 -0.09189254 13.60378886
## salary 43822.5291991 1940.5276360 244.31568869 -10442.62667314
## salary
## age -29185.2850
## gmat_tot -170.8814
## gmat_qpc 22855.7178
## gmat_vpc 2901.3078
## gmat_tpc 43822.5292
## s_avg 1940.5276
## f_avg 244.3157
## work_yrs -10442.6267
## salary 2825177000.1131
job11.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
job11.df
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35 22 Female 660 90 92 94 3.50 3.75 1
## 36 27 Female 700 94 98 98 3.30 3.25 1
## 37 25 Female 680 87 96 96 3.50 2.67 1
## 38 25 Female 650 82 91 93 3.40 3.25 1
## 39 27 Male 710 96 96 98 3.30 3.50 1
## 40 28 Female 620 52 98 87 3.40 3.75 1
## 41 24 Male 670 84 96 95 3.30 3.25 1
## 42 25 Female 560 52 81 72 3.30 3.50 1
## 43 25 Female 530 50 62 61 3.60 3.67 1
## 44 25 Male 650 79 93 93 3.30 3.50 1
## 45 26 Female 590 56 89 81 3.30 3.25 1
## 46 23 Female 650 93 81 93 3.40 3.00 1
## 47 24 Male 560 81 50 71 3.40 3.67 1
## 48 27 Male 610 72 84 86 3.30 3.50 1
## 49 25 Male 650 95 84 93 3.30 3.00 1
## 50 25 Male 550 74 50 68 3.50 3.50 1
## 51 26 Male 570 68 74 75 3.80 3.50 1
## 52 26 Male 580 79 71 78 3.45 3.50 1
## 53 30 Male 600 60 91 83 3.30 3.25 1
## 54 31 Male 570 72 71 75 3.60 3.50 1
## 55 30 Male 620 60 96 87 3.50 3.00 1
## 56 30 Female 680 96 87 96 3.70 3.60 1
## 57 27 Male 630 93 75 91 3.30 3.25 1
## 58 25 Male 600 82 74 83 3.50 3.25 1
## 59 28 Female 640 89 81 91 3.60 3.50 1
## 60 39 Male 600 72 81 83 3.60 3.50 1
## 61 27 Male 570 95 33 75 3.70 4.00 1
## 62 27 Male 710 95 98 98 3.60 3.50 1
## 63 33 Male 620 72 89 87 3.50 3.50 1
## 64 27 Male 600 67 84 83 3.50 3.00 1
## 65 28 Male 700 95 95 98 3.80 4.00 1
## 66 30 Male 600 77 81 84 3.50 3.25 1
## 67 30 Female 670 87 95 95 3.30 3.25 1
## 68 40 Male 630 71 95 91 4.00 0.00 1
## 69 25 Male 700 98 93 98 3.60 3.75 1
## 115 26 Female 670 87 95 95 3.10 3.33 2
## 116 25 Female 620 89 74 87 3.10 3.50 2
## 117 31 Male 540 60 62 65 3.10 3.00 2
## 118 25 Male 670 95 89 95 3.20 3.50 2
## 119 25 Male 610 87 71 86 3.27 3.25 2
## 120 24 Male 560 52 81 72 3.20 3.25 2
## 121 24 Male 500 78 30 52 3.00 2.75 2
## 122 23 Male 590 72 81 81 3.20 3.25 2
## 123 24 Male 570 82 58 75 3.20 3.25 2
## 124 26 Female 570 93 37 75 3.00 2.75 2
## 125 28 Female 580 83 58 79 3.10 3.00 2
## 126 24 Female 580 72 71 78 3.00 3.25 2
## 127 31 Male 560 68 67 72 3.09 3.00 2
## 128 25 Female 620 89 74 87 3.10 3.50 2
## 129 27 Male 620 97 63 88 3.20 3.00 2
## 130 28 Male 560 75 58 72 3.20 3.25 2
## 131 26 Male 680 84 96 96 3.20 3.25 2
## 132 27 Male 620 81 87 89 3.00 3.00 2
## 133 34 Male 550 72 58 69 3.00 3.00 2
## 134 26 Male 600 84 67 83 3.09 3.50 2
## 135 29 Male 670 91 93 95 3.10 3.00 2
## 136 24 Male 620 84 81 87 3.00 3.25 2
## 137 27 Male 630 72 95 89 3.20 3.00 2
## 138 26 Male 650 89 87 93 3.20 3.25 2
## 139 24 Male 620 88 74 87 3.10 3.00 2
## 186 23 Female 520 43 67 58 2.90 2.75 3
## 187 27 Male 620 87 74 87 2.70 2.75 3
## 188 25 Male 580 78 67 80 2.90 3.25 3
## 189 25 Male 630 75 93 89 2.70 2.50 3
## 190 25 Male 610 89 74 87 2.70 2.75 3
## 191 29 Female 560 64 71 72 2.90 3.00 3
## 192 27 Male 620 79 87 88 2.90 2.75 3
## 193 28 Male 580 72 71 78 2.80 3.00 3
## 194 24 Female 670 83 98 96 2.90 3.25 3
## 195 25 Female 560 39 91 72 2.90 3.00 3
## 196 25 Female 580 72 71 78 2.80 3.25 3
## 197 27 Male 680 97 90 97 2.90 2.75 3
## 198 28 Male 610 89 67 86 2.70 3.00 3
## 199 29 Male 710 93 98 99 2.90 3.25 3
## 200 24 Male 710 99 92 99 2.90 3.00 3
## 201 25 Female 630 84 87 89 2.80 2.75 3
## 202 24 Female 600 89 67 85 2.80 3.00 3
## 203 29 Male 660 91 90 95 2.80 3.00 3
## 204 30 Male 670 83 97 96 2.80 2.75 3
## 205 24 Male 580 89 54 78 2.91 2.83 3
## 206 29 Male 680 79 99 96 2.90 3.00 3
## 207 32 Male 660 83 95 94 2.90 3.50 3
## 208 28 Male 570 56 84 75 2.90 3.00 3
## 209 24 Male 680 96 87 97 2.80 2.75 3
## 256 24 Female 560 55 78 71 3.50 3.25 4
## 257 23 Male 660 81 98 95 2.50 3.00 4
## 258 25 Female 720 96 98 99 3.50 3.60 4
## 259 26 Male 620 78 87 89 2.40 2.00 4
## 260 26 Female 630 85 81 90 2.90 3.25 4
## 261 27 Male 650 89 89 93 2.40 2.25 4
## 262 25 Male 660 99 71 95 3.40 3.25 4
## 263 25 Male 610 83 81 86 2.40 2.75 4
## 264 26 Male 600 87 62 83 2.50 2.50 4
## 265 24 Male 570 75 62 75 2.30 2.50 4
## 266 24 Female 600 77 78 84 2.60 3.00 4
## 267 26 Female 650 91 84 93 2.60 3.00 4
## 268 29 Male 630 72 95 89 2.60 2.50 4
## 269 26 Male 630 96 71 91 2.60 2.75 4
## 270 31 Male 530 75 45 62 2.40 2.75 4
## 271 23 Male 580 64 81 78 2.20 2.00 4
## 272 25 Male 540 79 45 65 2.60 2.50 4
## 273 26 Male 550 72 58 69 2.60 2.75 4
## 274 40 Female 500 60 45 51 2.50 2.75 4
## work_yrs frstlang salary satis
## 35 1 English 85000 5
## 36 2 English 85000 6
## 37 2 English 86000 5
## 38 3 English 88000 7
## 39 2 English 92000 6
## 40 5 English 93000 5
## 41 0 English 95000 4
## 42 1 English 95000 5
## 43 3 English 95000 3
## 44 1 English 96000 7
## 45 4 English 96000 5
## 46 2 English 100000 7
## 47 2 English 100000 6
## 48 6 English 100000 6
## 49 2 English 105000 7
## 50 3 English 105000 6
## 51 3 English 105000 6
## 52 2 English 105000 5
## 53 5 English 105000 6
## 54 6 English 105000 6
## 55 8 English 106000 7
## 56 6 English 106000 6
## 57 3 English 107500 5
## 58 3 English 108000 6
## 59 6 English 110000 5
## 60 16 English 112000 7
## 61 4 English 115000 5
## 62 1 English 115000 5
## 63 10 Others 118000 7
## 64 3 English 120000 5
## 65 5 English 120000 5
## 66 5 English 120000 6
## 67 8 English 120000 6
## 68 15 English 146000 6
## 69 1 English 162000 5
## 115 1 English 82000 7
## 116 2 English 92000 5
## 117 8 English 93000 6
## 118 2 English 95000 6
## 119 3 English 95000 6
## 120 2 English 96000 7
## 121 2 English 96500 6
## 122 2 English 98000 6
## 123 2 English 98000 6
## 124 3 Others 98000 5
## 125 5 Others 99000 6
## 126 2 English 100000 5
## 127 4 English 100000 6
## 128 2 English 101000 5
## 129 3 English 103000 6
## 130 4 English 104000 5
## 131 3 English 105000 6
## 132 3 English 105000 5
## 133 16 English 105000 5
## 134 2 English 107000 5
## 135 6 English 112000 6
## 136 1 English 115000 6
## 137 4 English 115000 6
## 138 4 English 130000 7
## 139 2 English 145800 6
## 186 1 English 78256 5
## 187 3 English 88500 6
## 188 2 English 90000 7
## 189 2 English 90000 5
## 190 4 English 93000 6
## 191 5 English 95000 7
## 192 4 English 97000 7
## 193 3 English 97000 6
## 194 2 English 98000 7
## 195 2 English 98000 7
## 196 2 English 98000 6
## 197 2 Others 98000 6
## 198 4 English 98000 7
## 199 7 English 98000 5
## 200 3 English 100000 6
## 201 2 English 100000 6
## 202 2 English 101000 6
## 203 8 English 101100 6
## 204 6 English 102500 5
## 205 2 English 105000 5
## 206 6 English 106000 6
## 207 2 Others 107300 7
## 208 4 English 108000 6
## 209 2 English 112000 6
## 256 2 English 64000 7
## 257 2 English 77000 6
## 258 3 English 85000 6
## 259 2 English 85000 6
## 260 3 English 86000 5
## 261 5 English 90000 5
## 262 2 English 92000 7
## 263 2 English 95000 7
## 264 2 English 96000 6
## 265 2 English 98000 6
## 266 2 English 100000 6
## 267 2 English 100000 7
## 268 3 English 100400 7
## 269 3 English 101600 6
## 270 4 Others 104000 6
## 271 2 English 105000 6
## 272 3 English 115000 5
## 273 3 English 126710 6
## 274 15 Others 220000 6
mytable <-xtabs(~salary+sex,data=job11.df)
mytable
## sex
## salary Male Female
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
From this table we see that most higher starting salaries have been awarded to men.
mytable1 <-xtabs(~salary+work_yrs,data=job11.df)
mytable1
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 10 15 16
## 64000 0 0 1 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 1 0
From the above table that a minimum of 2 years of work experience is necessary
mytable2<-xtabs(~salary+frstlang,data=job11.df)
mytable2
## frstlang
## salary English Others
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
It is seen that students with English as first language are mostly preferred and get higher salaries and jobs compared to those whose first language is not English.
mytable3<-xtabs(~salary+gmat_tot,data=job11.df)
mytable3
## gmat_tot
## salary 500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
## 64000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 90000 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 93000 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
## 95000 0 0 1 0 0 2 0 0 0 0 2 0 0 0 0 0
## 96000 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## 98000 0 0 0 0 0 1 3 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 2 0 1 0 1 1 0 1 0 2 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104000 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 2 0 2 3 0 1 0 1 0 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 108000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 112000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 670 680 700 710 720
## 64000 0 0 0 0 0
## 77000 0 0 0 0 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 1 0 1
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 0 0 1 0
## 93000 0 0 0 0 0
## 95000 2 0 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 1 1 0 1 0
## 99000 0 0 0 0 0
## 100000 0 0 0 1 0
## 100400 0 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 1 0 0 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 2 0 0 0
## 107000 0 0 0 0 0
## 107300 0 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 1 1 0 0 0
## 115000 0 0 0 1 0
## 118000 0 0 0 0 0
## 120000 1 0 1 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 0 0 0 0 0
## 162000 0 0 1 0 0
## 220000 0 0 0 0 0
1.SALARY OF MALE AND FEMALE
NULL HHYPOTHESIS: There is no difference between salary of male and Female.
log.transformed.salary=log(job11.df$salary)
t.test(log.transformed.salary~ job11.df$sex, var.equal = TRUE)
##
## Two Sample t-test
##
## data: log.transformed.salary by job11.df$sex
## t = 2.4552, df = 101, p-value = 0.01579
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01470674 0.13847594
## sample estimates:
## mean in group Male mean in group Female
## 11.55390 11.47731
pvalue<0.05 ,Hence We Reject the Null Hypothesis and prove that there is significant difference in Salary of Male and Female.
cor.test(job.df$salary,job.df$work_yrs,var.equal = TRUE,paired = FALSE)
##
## Pearson's product-moment correlation
##
## data: job.df$salary and job.df$work_yrs
## t = -0.73721, df = 191, p-value = 0.4619
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.19305455 0.08864017
## sample estimates:
## cor
## -0.05326685
p-value<0.05.Hence its clarifies that there is relation between Salary and Work Experience.
cor.test(job.df$salary,job.df$gmat_tot,var.equal = TRUE,paired = FALSE)
##
## Pearson's product-moment correlation
##
## data: job.df$salary and job.df$gmat_tot
## t = -0.00078582, df = 191, p-value = 0.9994
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1412959 0.1411844
## sample estimates:
## cor
## -0.00005685962
p-value>0.05.Hence its clarifies that there is no relation between Salary and Work Experience.
chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable2
## X-squared = 69.847, df = 41, p-value = 0.003296
Since p<0.01 we can say that there is a relationship between first language and salary
chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable1
## X-squared = 535.23, df = 451, p-value = 0.003809
Since p<0.01 there is a relationship betweeen work experience and salary
chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable3
## X-squared = 927.24, df = 820, p-value = 0.005279
Since p<0.01 we see there exists a relationship between Total GMAT score and starting salary. ## Regression Model
job11.df <- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary!="0"), ]
fit <- lm(job11.df$salary ~job11.df$gmat_tot+job11.df$gmat_qpc+job11.df$gmat_vpc+job11.df$gmat_tpc, data = job11.df)
summary(fit)
##
## Call:
## lm(formula = job11.df$salary ~ job11.df$gmat_tot + job11.df$gmat_qpc +
## job11.df$gmat_vpc + job11.df$gmat_tpc, data = job11.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40370 -8250 -2164 5253 100097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109539.54 48054.24 2.279 0.0248 *
## job11.df$gmat_tot 55.01 181.71 0.303 0.7627
## job11.df$gmat_qpc 718.40 541.90 1.326 0.1880
## job11.df$gmat_vpc 546.10 543.85 1.004 0.3178
## job11.df$gmat_tpc -1663.16 801.57 -2.075 0.0406 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared: 0.06089, Adjusted R-squared: 0.02256
## F-statistic: 1.589 on 4 and 98 DF, p-value: 0.1834
Gmat_tpc is a significant variable in model 1 The multiple R squared value indicates that the model accounts for 6% of the variance in the variables The residual error (17670) can be thought of as the average error in predicting salary using the various gmat data available.
fit1 <- lm(job.df$salary ~job.df$satis+job.df$work_yrs+job.df$frstlang, data = job.df)
summary(fit1)
##
## Call:
## lm(formula = job.df$salary ~ job.df$satis + job.df$work_yrs +
## job.df$frstlang, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -70468 -53495 25281 45013 165308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5665.6 29404.9 -0.193 0.8474
## job.df$satis 10983.1 4970.8 2.210 0.0283 *
## job.df$work_yrs -748.6 1033.5 -0.724 0.4697
## job.df$frstlangOthers 5687.9 14333.6 0.397 0.6919
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 52820 on 189 degrees of freedom
## Multiple R-squared: 0.02799, Adjusted R-squared: 0.01256
## F-statistic: 1.814 on 3 and 189 DF, p-value: 0.146
work_yrs and frstlang are significant variables in model 2 The multiple R squared value indicates that the model accounts for 24.66% of the variance in the variables.
fit2 <- lm(job.df$salary ~job.df$age+job.df$sex, data = job.df)
summary(fit2)
##
## Call:
## lm(formula = job.df$salary ~ job.df$age + job.df$sex, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63707 -55467 25564 44168 184091
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 99617.1 25493.0 3.908 0.00013 ***
## job.df$age -1635.2 906.9 -1.803 0.07298 .
## job.df$sexFemale 1698.4 8498.3 0.200 0.84181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 52970 on 190 degrees of freedom
## Multiple R-squared: 0.01716, Adjusted R-squared: 0.006813
## F-statistic: 1.658 on 2 and 190 DF, p-value: 0.1932
Age is a significant factor in model 3
We see that model 2 is better than model 1 and model 3, with a higher R-squared value.
As Model 2 is best fit for us we will consider the following equation.
y= B1x1 + B2x2 + B3x3 + B4x4 where y= salary of placed hence we will get the Beta coeficients
coefficients(fit1)
## (Intercept) job.df$satis job.df$work_yrs
## -5665.5929 10983.1055 -748.6187
## job.df$frstlangOthers
## 5687.8724
above are the Beta coefficients. hence model is: salary= B1(work_yrs)+B2(Firstlanguage)
nojob.df<- mba.df[ which(mba.df$salary !="998" & mba.df$salary !="999" & mba.df$salary==0), ]
head(nojob.df)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 1 23 Female 620 77 87 87 3.4 3.00 1
## 2 24 Male 610 90 71 87 3.5 4.00 1
## 3 24 Male 670 99 78 95 3.3 3.25 1
## 4 24 Male 570 56 81 75 3.3 2.67 1
## 6 24 Male 640 82 89 91 3.9 3.75 1
## 7 25 Male 610 89 74 87 3.4 3.50 1
## work_yrs frstlang salary satis
## 1 2 English 0 7
## 2 2 English 0 6
## 3 2 English 0 6
## 4 1 English 0 7
## 6 2 English 0 6
## 7 2 English 0 5
hist(nojob.df$gmat_tot,
main = "GMAT performance of students with no job",
xlab="GMAT score",
breaks=10,
col = "orange")
Distributed between 550-650 for unplaced students while it is more scattered amongst those who do have a job.
chisq.test(nojob.df$work_yrs,nojob.df$satis)
## Warning in chisq.test(nojob.df$work_yrs, nojob.df$satis): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: nojob.df$work_yrs and nojob.df$satis
## X-squared = 44.974, df = 48, p-value = 0.5976
This shows that the null hypothesis is true and that unplaced students with work experience are satisfied with the MBA program
Summary 1. Students salary was significantly on Work experience and Language preferance