Read the data & Load the data file

mba.df <- read.csv(paste("MBA Starting Salaries Data.csv", sep="new"))

View the data

View(mba.df)
attach(mba.df)
library(psych)
describe(mba.df)[,c(1:10)]
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range
## age          26
## sex           1
## gmat_tot    340
## gmat_qpc     71
## gmat_vpc     83
## gmat_tpc     99
## s_avg         2
## f_avg         4
## quarter       3
## work_yrs     22
## frstlang      1
## salary   220000
## satis       997

to check the datatype of all variables

str(mba.df)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...

as we can see that for sex (string) is used instead we can use factors(0or 1)

mba.df$sex[mba.df$sex == 1] <- 'Male'
mba.df$sex[mba.df$sex == 2] <- 'Female'
mba.df$sex <- factor(mba.df$sex)

MBAs who got placed and who disclosed their salaries

placed.df <- mba.df[which (mba.df$salary > 1000) , ]
View(placed.df)

MBAs who were not placed

notPlaced.df <- mba.df[which(mba.df$salary==0), ]
View(notPlaced.df)

MBAs who were placed but did not disclose their salary

notDisclosedSalary.df <- mba.df[which (mba.df$salary == 999) , ]
View(notDisclosedSalary.df)

Summary of of all Placed students

library(psych)
describe(placed.df)[,c(1:10)]
##          vars   n      mean       sd   median   trimmed     mad     min
## age         1 103     26.78     3.27 2.60e+01     26.30    2.97    22.0
## sex*        2 103      1.70     0.46 2.00e+00      1.75    0.00     1.0
## gmat_tot    3 103    616.02    50.69 6.20e+02    615.90   59.30   500.0
## gmat_qpc    4 103     79.73    13.39 8.20e+01     81.05   13.34    39.0
## gmat_vpc    5 103     78.56    16.14 8.10e+01     80.33   16.31    30.0
## gmat_tpc    6 103     84.52    11.01 8.70e+01     85.60   11.86    51.0
## s_avg       7 103      3.09     0.38 3.10e+00      3.10    0.44     2.2
## f_avg       8 103      3.09     0.49 3.25e+00      3.13    0.37     0.0
## quarter     9 103      2.26     1.12 2.00e+00      2.20    1.48     1.0
## work_yrs   10 103      3.68     3.01 3.00e+00      3.11    1.48     0.0
## frstlang   11 103      1.07     0.25 1.00e+00      1.00    0.00     1.0
## salary     12 103 103030.74 17868.80 1.00e+05 101065.06 7413.00 64000.0
## satis      13 103      5.88     0.78 6.00e+00      5.89    1.48     3.0
##             max    range
## age          40     18.0
## sex*          2      1.0
## gmat_tot    720    220.0
## gmat_qpc     99     60.0
## gmat_vpc     99     69.0
## gmat_tpc     99     48.0
## s_avg         4      1.8
## f_avg         4      4.0
## quarter       4      3.0
## work_yrs     16     16.0
## frstlang      2      1.0
## salary   220000 156000.0
## satis         7      4.0

Distribution of Salary of placed dataset

library(lattice)
histogram(~salary, data = placed.df,
main = "Distribution of Starting Salary", 
xlab="Starting Salary of placed",
col='blue' )

Scatter plot of placed people ( Work Experience)

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(salary ~ work_yrs ,data=placed.df, main="Scatterplot of placed people  ( Work Experience)", xlab="Work Experience of placed", ylab="MBA's Starting Salaries", horizontal=TRUE)
## Warning in plot.window(...): "horizontal" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "horizontal" is not a graphical
## parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in box(...): "horizontal" is not a graphical parameter
## Warning in title(...): "horizontal" is not a graphical parameter