We will read and summarise the data set.

mba_data <- read.csv(paste("MBA_Starting_Salaries Data.csv"))
View(mba_data)
summary(mba_data)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

Now we will load the lattice library to do some visual representation

library(lattice)
histogram(satis~age, col=("green"), xlab = "Age", ylab = "Satisfaction", data = mba_data)

library(lattice)
histogram(mba_data$sex~ mba_data$salary, col = ("Blue"), xlab = "Sex", ylab = "Salary")

Placement to Sex parity

mba_placed <- subset(mba_data,mba_data$salary != 0)
View(mba_placed)

Let us see how many males and females are placed,

a <- table(mba_placed$sex)
a
## 
##   1   2 
## 139  45

We do a chi-square test,

chisq.test(a)
## 
##  Chi-squared test for given probabilities
## 
## data:  a
## X-squared = 48.022, df = 1, p-value = 4.215e-12

Which is a significant value.

Now we do a linear regression model,

x <- lm(mba_placed$sex~mba_placed$work_yrs+mba_placed$salary+mba_placed$age+mba_placed$satis)
x
## 
## Call:
## lm(formula = mba_placed$sex ~ mba_placed$work_yrs + mba_placed$salary + 
##     mba_placed$age + mba_placed$satis)
## 
## Coefficients:
##         (Intercept)  mba_placed$work_yrs    mba_placed$salary  
##           2.099e+00            1.934e-02            8.875e-07  
##      mba_placed$age     mba_placed$satis  
##          -3.627e-02           -1.022e-05
model_fit <- lm(x, data = mba_placed) 
summary(model_fit)
## 
## Call:
## lm(formula = x, data = mba_placed)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39688 -0.26988 -0.20496 -0.01683  0.87195 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.099e+00  5.095e-01   4.120 5.78e-05 ***
## mba_placed$work_yrs  1.934e-02  2.363e-02   0.818   0.4142    
## mba_placed$salary    8.875e-07  7.888e-07   1.125   0.2621    
## mba_placed$age      -3.627e-02  2.108e-02  -1.720   0.0871 .  
## mba_placed$satis    -1.022e-05  9.494e-05  -0.108   0.9144    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4281 on 179 degrees of freedom
## Multiple R-squared:  0.03496,    Adjusted R-squared:  0.0134 
## F-statistic: 1.621 on 4 and 179 DF,  p-value: 0.1708