mba_data <- read.csv(paste("MBA_Starting_Salaries Data.csv"))
View(mba_data)
summary(mba_data)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
library(lattice)
histogram(satis~age, col=("green"), xlab = "Age", ylab = "Satisfaction", data = mba_data)
library(lattice)
histogram(mba_data$sex~ mba_data$salary, col = ("Blue"), xlab = "Sex", ylab = "Salary")
Placement to Sex parity
mba_placed <- subset(mba_data,mba_data$salary != 0)
View(mba_placed)
Let us see how many males and females are placed,
a <- table(mba_placed$sex)
a
##
## 1 2
## 139 45
We do a chi-square test,
chisq.test(a)
##
## Chi-squared test for given probabilities
##
## data: a
## X-squared = 48.022, df = 1, p-value = 4.215e-12
Which is a significant value.
Now we do a linear regression model,
x <- lm(mba_placed$sex~mba_placed$work_yrs+mba_placed$salary+mba_placed$age+mba_placed$satis)
x
##
## Call:
## lm(formula = mba_placed$sex ~ mba_placed$work_yrs + mba_placed$salary +
## mba_placed$age + mba_placed$satis)
##
## Coefficients:
## (Intercept) mba_placed$work_yrs mba_placed$salary
## 2.099e+00 1.934e-02 8.875e-07
## mba_placed$age mba_placed$satis
## -3.627e-02 -1.022e-05
model_fit <- lm(x, data = mba_placed)
summary(model_fit)
##
## Call:
## lm(formula = x, data = mba_placed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39688 -0.26988 -0.20496 -0.01683 0.87195
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.099e+00 5.095e-01 4.120 5.78e-05 ***
## mba_placed$work_yrs 1.934e-02 2.363e-02 0.818 0.4142
## mba_placed$salary 8.875e-07 7.888e-07 1.125 0.2621
## mba_placed$age -3.627e-02 2.108e-02 -1.720 0.0871 .
## mba_placed$satis -1.022e-05 9.494e-05 -0.108 0.9144
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4281 on 179 degrees of freedom
## Multiple R-squared: 0.03496, Adjusted R-squared: 0.0134
## F-statistic: 1.621 on 4 and 179 DF, p-value: 0.1708