.....### MBA STARTING SALARIES ###.....
....##Under the guidance of Prof.Sameer Mathur(Ph.D,Carnige Mellon University),IIM-LUCKNOW##....
#To read the data from .csv file#
mba.df <- read.csv(paste("MBA Starting Salaries Data.csv") )
View(mba.df)
#To summarize all the given data by command function summary#
summary(mba.df)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
# Data Types
str(mba.df)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
#using boxplot to plot age#
boxplot(mba.df$age, horizontal = TRUE, main="Age", xlab="years",col="green")

#using boxplot to plot gmat_tot#
boxplot(mba.df$gmat_tot, horizontal = TRUE, main="GMAT TOTAL", xlab="score",col="green")

#using histogram
hist(mba.df$gmat_tot, xlab="total percentile", main="GMAT TOTAL", col = "yellow")

#using boxplot to plot gmat_qpc#
boxplot(mba.df$gmat_qpc, horizontal = TRUE, main="GMAT Quantitative percentile", xlab="percentile",col="green")

#using histogram
hist(mba.df$gmat_qpc, xlab="percentile", main="GMAT quantitative percentile", col = "yellow")

#using boxplot to plot gmat_vpc#
boxplot(mba.df$gmat_vpc, horizontal = TRUE, main="GMAT Verbal percentile", xlab="percentile",col="green")

#using histogram
hist(mba.df$gmat_vpc, xlab="percentile", main="GMAT verbal percentile", col = "yellow")

#using boxplot to plot gmat_tpc#
boxplot(mba.df$gmat_tpc, horizontal = TRUE, main="Total percentile", xlab="percentile",col="green")

#using histogram
hist(mba.df$gmat_tpc, xlab="total percentile", main="GMAT TOTAL", col = "yellow")

par(mfrow=c(1,2))
boxplot(mba.df$s_avg, horizontal = FALSE, main="Spring MBA average", ylab="grade",col="green")
boxplot(mba.df$f_avg, horizontal = FALSE, main="Fall MBA average", ylab="grade",col="green")

#using histogram
par(mfrow=c(1,2))
hist(mba.df$s_avg, xlab="Grade", main="Spring MBA average", col = "yellow")
hist(mba.df$f_avg, xlab="Grade", main="Fall MBA average", col="yellow")

#To plot work experiene Vs salary#
boxplot(mba.df$salary~mba.df$work_yrs,main="Work experience vs Salary",col="green",xlab="Work Experience(in years)",ylab="Salary")

#To plot quartile ranking Vs salary
boxplot(mba.df$salary~mba.df$quarter,main="Quartile Ranking vs Salary",col="green",xlab="Quartile Rankings",ylab="Salary")

#using scatterplot#
library(car)
scatterplotMatrix(formula = ~age+gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc+s_avg+f_avg+work_yrs+salary ,cex=1,data=mba.df)

#tO Construct the Corrgram FOR MBA SALARIES #
library(corrgram)
cols <- colorRampPalette(c("red", "blue",
"yellow", "darkgreen"))
corrgram(mba.df,order=TRUE, col.regions=cols,
lower.panel = panel.shade,
upper.panel = panel.pie, text.panel = panel.txt,
main="Corrgram of MBA Starting Salaries")

#variance matrix#
var(mba.df)
## age sex gmat_tot gmat_qpc
## age 1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex -4.513248e-02 1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00 3.310688e+03 6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00 6.200233e+02 2.210731e+02
## gmat_vpc -2.763643e+00 5.463758e-01 7.260006e+02 3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02 6.839911e+02 1.357997e+02
## s_avg 2.116874e-01 2.096227e-02 2.480257e+00 -1.691233e-01
## f_avg -3.399348e-02 2.082698e-02 3.154688e+00 5.753854e-01
## quarter -2.045935e-01 -6.414267e-02 -5.891153e+00 6.001979e-01
## work_yrs 1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang 6.796610e-02 2.138980e-04 -2.499933e+00 6.646346e-01
## salary -1.183042e+04 1.518264e+03 -1.611600e+05 -3.335823e+04
## satis -1.763499e+02 -8.780808e+00 1.765263e+03 3.348371e+02
## gmat_vpc gmat_tpc s_avg f_avg
## age -2.7636427 -8.8399775 0.21168739 -0.03399348
## sex 0.5463758 -0.0490896 0.02096227 0.02082698
## gmat_tot 726.0006417 683.9910698 2.48025721 3.15468838
## gmat_qpc 38.1482581 135.7996845 -0.16912329 0.57538542
## gmat_vpc 284.2481217 157.4932488 1.31357023 0.67207000
## gmat_tpc 157.4932488 196.6057057 0.62710008 0.58698618
## s_avg 1.3135702 0.6271001 0.14521760 0.11016898
## f_avg 0.6720700 0.5869862 0.11016898 0.27567237
## quarter -3.2676666 -1.2923719 -0.32237213 -0.26080880
## work_yrs -3.6181653 -7.8575172 0.15926392 -0.06628700
## frstlang -2.1145691 -0.4663244 -0.01671372 -0.00626026
## salary -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis 392.3562739 484.2466779 -4.62884495 2.12532927
## quarter work_yrs frstlang salary
## age -2.045935e-01 10.29493864 6.796610e-02 -1.183042e+04
## sex -6.414267e-02 -0.01580172 2.138980e-04 1.518264e+03
## gmat_tot -5.891153e+00 -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc 6.001979e-01 -11.37186171 6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00 -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00 -7.85751718 -4.663244e-01 3.522750e+03
## s_avg -3.223721e-01 0.15926392 -1.671372e-02 2.831601e+03
## f_avg -2.608088e-01 -0.06628700 -6.260260e-03 7.876560e+02
## quarter 1.232119e+00 -0.30866822 3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01 10.44882490 -2.898318e-02 1.486147e+03
## frstlang 3.553381e-02 -0.02898318 1.035266e-01 -1.419586e+03
## salary -9.296214e+03 1486.14704152 -1.419586e+03 2.596062e+09
## satis -5.227133e-03 -131.24080907 9.484532e+00 -6.347115e+06
## satis
## age -1.763499e+02
## sex -8.780808e+00
## gmat_tot 1.765263e+03
## gmat_qpc 3.348371e+02
## gmat_vpc 3.923563e+02
## gmat_tpc 4.842467e+02
## s_avg -4.628845e+00
## f_avg 2.125329e+00
## quarter -5.227133e-03
## work_yrs -1.312408e+02
## frstlang 9.484532e+00
## salary -6.347115e+06
## satis 1.380974e+05
#covariance matrix#
cov(mba.df)
## age sex gmat_tot gmat_qpc
## age 1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex -4.513248e-02 1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00 3.310688e+03 6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00 6.200233e+02 2.210731e+02
## gmat_vpc -2.763643e+00 5.463758e-01 7.260006e+02 3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02 6.839911e+02 1.357997e+02
## s_avg 2.116874e-01 2.096227e-02 2.480257e+00 -1.691233e-01
## f_avg -3.399348e-02 2.082698e-02 3.154688e+00 5.753854e-01
## quarter -2.045935e-01 -6.414267e-02 -5.891153e+00 6.001979e-01
## work_yrs 1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang 6.796610e-02 2.138980e-04 -2.499933e+00 6.646346e-01
## salary -1.183042e+04 1.518264e+03 -1.611600e+05 -3.335823e+04
## satis -1.763499e+02 -8.780808e+00 1.765263e+03 3.348371e+02
## gmat_vpc gmat_tpc s_avg f_avg
## age -2.7636427 -8.8399775 0.21168739 -0.03399348
## sex 0.5463758 -0.0490896 0.02096227 0.02082698
## gmat_tot 726.0006417 683.9910698 2.48025721 3.15468838
## gmat_qpc 38.1482581 135.7996845 -0.16912329 0.57538542
## gmat_vpc 284.2481217 157.4932488 1.31357023 0.67207000
## gmat_tpc 157.4932488 196.6057057 0.62710008 0.58698618
## s_avg 1.3135702 0.6271001 0.14521760 0.11016898
## f_avg 0.6720700 0.5869862 0.11016898 0.27567237
## quarter -3.2676666 -1.2923719 -0.32237213 -0.26080880
## work_yrs -3.6181653 -7.8575172 0.15926392 -0.06628700
## frstlang -2.1145691 -0.4663244 -0.01671372 -0.00626026
## salary -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis 392.3562739 484.2466779 -4.62884495 2.12532927
## quarter work_yrs frstlang salary
## age -2.045935e-01 10.29493864 6.796610e-02 -1.183042e+04
## sex -6.414267e-02 -0.01580172 2.138980e-04 1.518264e+03
## gmat_tot -5.891153e+00 -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc 6.001979e-01 -11.37186171 6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00 -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00 -7.85751718 -4.663244e-01 3.522750e+03
## s_avg -3.223721e-01 0.15926392 -1.671372e-02 2.831601e+03
## f_avg -2.608088e-01 -0.06628700 -6.260260e-03 7.876560e+02
## quarter 1.232119e+00 -0.30866822 3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01 10.44882490 -2.898318e-02 1.486147e+03
## frstlang 3.553381e-02 -0.02898318 1.035266e-01 -1.419586e+03
## salary -9.296214e+03 1486.14704152 -1.419586e+03 2.596062e+09
## satis -5.227133e-03 -131.24080907 9.484532e+00 -6.347115e+06
## satis
## age -1.763499e+02
## sex -8.780808e+00
## gmat_tot 1.765263e+03
## gmat_qpc 3.348371e+02
## gmat_vpc 3.923563e+02
## gmat_tpc 4.842467e+02
## s_avg -4.628845e+00
## f_avg 2.125329e+00
## quarter -5.227133e-03
## work_yrs -1.312408e+02
## frstlang 9.484532e+00
## salary -6.347115e+06
## satis 1.380974e+05
#To know the people who actually got job#
selected <- mba.df[which(mba.df$salary > 1000),]
library(car)
some(selected)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 45 26 2 590 56 89 81 3.3 3.25 1
## 51 26 1 570 68 74 75 3.8 3.50 1
## 128 25 2 620 89 74 87 3.1 3.50 2
## 132 27 1 620 81 87 89 3.0 3.00 2
## 138 26 1 650 89 87 93 3.2 3.25 2
## 191 29 2 560 64 71 72 2.9 3.00 3
## 256 24 2 560 55 78 71 3.5 3.25 4
## 262 25 1 660 99 71 95 3.4 3.25 4
## 265 24 1 570 75 62 75 2.3 2.50 4
## 272 25 1 540 79 45 65 2.6 2.50 4
## work_yrs frstlang salary satis
## 45 4 1 96000 5
## 51 3 1 105000 6
## 128 2 1 101000 5
## 132 3 1 105000 5
## 138 4 1 130000 7
## 191 5 1 95000 7
## 256 2 1 64000 7
## 262 2 1 92000 7
## 265 2 1 98000 6
## 272 3 1 115000 5
#Regression model#
model1<-lm(salary~gmat_qpc+gmat_tot+gmat_tpc+gmat_vpc,data=mba.df)
summary(model1)
##
## Call:
## lm(formula = salary ~ gmat_qpc + gmat_tot + gmat_tpc + gmat_vpc,
## data = mba.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48199 -41195 -33034 56735 182897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 141539.0 59303.9 2.387 0.0177 *
## gmat_qpc 465.7 615.2 0.757 0.4497
## gmat_tot -369.7 222.7 -1.660 0.0980 .
## gmat_tpc 523.2 443.0 1.181 0.2386
## gmat_vpc 573.4 563.0 1.018 0.3094
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50900 on 269 degrees of freedom
## Multiple R-squared: 0.01651, Adjusted R-squared: 0.001889
## F-statistic: 1.129 on 4 and 269 DF, p-value: 0.343