Read the data & Load the data file

mba.df <- read.csv(paste("MBA Starting Salaries Data.csv", sep="new"))

View the data

View(mba.df)
attach(mba.df)

library(psych)
describe(mba.df)[,c(1:10)]

##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range
## age          26
## sex           1
## gmat_tot    340
## gmat_qpc     71
## gmat_vpc     83
## gmat_tpc     99
## s_avg         2
## f_avg         4
## quarter       3
## work_yrs     22
## frstlang      1
## salary   220000
## satis       997

to check the datatype of all variables

str(mba.df)

## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...

as we can see that for sex (string) is used instead we can use factors(0or 1)

mba.df$sex[mba.df$sex == 1] <- 'Male'
mba.df$sex[mba.df$sex == 2] <- 'Female'
mba.df$sex <- factor(mba.df$sex)

MBAs who got placed and who disclosed their salaries

placed.df <- mba.df[which (mba.df$salary > 1000) , ]
View(placed.df)

MBAs who were not placed

notPlaced.df <- mba.df[which(mba.df$salary==0), ]
View(notPlaced.df)

MBAs who were placed but did not disclose their salary

notDisclosedSalary.df <- mba.df[which (mba.df$salary == 999) , ]
View(notDisclosedSalary.df)

Summary of of all Placed students

library(psych)
describe(placed.df)[,c(1:10)]

##          vars   n      mean       sd   median   trimmed     mad     min
## age         1 103     26.78     3.27 2.60e+01     26.30    2.97    22.0
## sex*        2 103      1.70     0.46 2.00e+00      1.75    0.00     1.0
## gmat_tot    3 103    616.02    50.69 6.20e+02    615.90   59.30   500.0
## gmat_qpc    4 103     79.73    13.39 8.20e+01     81.05   13.34    39.0
## gmat_vpc    5 103     78.56    16.14 8.10e+01     80.33   16.31    30.0
## gmat_tpc    6 103     84.52    11.01 8.70e+01     85.60   11.86    51.0
## s_avg       7 103      3.09     0.38 3.10e+00      3.10    0.44     2.2
## f_avg       8 103      3.09     0.49 3.25e+00      3.13    0.37     0.0
## quarter     9 103      2.26     1.12 2.00e+00      2.20    1.48     1.0
## work_yrs   10 103      3.68     3.01 3.00e+00      3.11    1.48     0.0
## frstlang   11 103      1.07     0.25 1.00e+00      1.00    0.00     1.0
## salary     12 103 103030.74 17868.80 1.00e+05 101065.06 7413.00 64000.0
## satis      13 103      5.88     0.78 6.00e+00      5.89    1.48     3.0
##             max    range
## age          40     18.0
## sex*          2      1.0
## gmat_tot    720    220.0
## gmat_qpc     99     60.0
## gmat_vpc     99     69.0
## gmat_tpc     99     48.0
## s_avg         4      1.8
## f_avg         4      4.0
## quarter       4      3.0
## work_yrs     16     16.0
## frstlang      2      1.0
## salary   220000 156000.0
## satis         7      4.0

Distribution of Salary of placed dataset

library(lattice)
histogram(~salary, data = placed.df,
main = "Distribution of Starting Salary", 
xlab="Starting Salary of placed",
col='blue' )

Scatter plot of placed people ( Work Experience)

library(car)

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

scatterplot(salary ~ work_yrs ,data=placed.df, main="Scatterplot of placed people  ( Work Experience)", xlab="Work Experience of placed", ylab="MBA's Starting Salaries", horizontal=TRUE)

## Warning in plot.window(...): "horizontal" is not a graphical parameter

## Warning in plot.xy(xy, type, ...): "horizontal" is not a graphical
## parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter

## Warning in box(...): "horizontal" is not a graphical parameter

## Warning in title(...): "horizontal" is not a graphical parameter

MBA Starting Salaries Assignment

Nikhilan Velumani

December 26, 2017

Read the data & Load the data file

View the data

to check the datatype of all variables

as we can see that for sex (string) is used instead we can use factors(0or 1)

MBAs who got placed and who disclosed their salaries

MBAs who were not placed

MBAs who were placed but did not disclose their salary

Summary of of all Placed students

Distribution of Salary of placed dataset

Scatter plot of placed people ( Work Experience)