Read the data & Load the data file
mba.df <- read.csv(paste("MBA Starting Salaries Data.csv", sep="new"))
View the data
View(mba.df)
attach(mba.df)
library(psych)
describe(mba.df)[,c(1:10)]
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range
## age 26
## sex 1
## gmat_tot 340
## gmat_qpc 71
## gmat_vpc 83
## gmat_tpc 99
## s_avg 2
## f_avg 4
## quarter 3
## work_yrs 22
## frstlang 1
## salary 220000
## satis 997
to check the datatype of all variables
str(mba.df)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
as we can see that for sex (string) is used instead we can use factors(0or 1)
mba.df$sex[mba.df$sex == 1] <- 'Male'
mba.df$sex[mba.df$sex == 2] <- 'Female'
mba.df$sex <- factor(mba.df$sex)
MBAs who got placed and who disclosed their salaries
placed.df <- mba.df[which (mba.df$salary > 1000) , ]
View(placed.df)
MBAs who were not placed
notPlaced.df <- mba.df[which(mba.df$salary==0), ]
View(notPlaced.df)
MBAs who were placed but did not disclose their salary
notDisclosedSalary.df <- mba.df[which (mba.df$salary == 999) , ]
View(notDisclosedSalary.df)
Summary of of all Placed students
library(psych)
describe(placed.df)[,c(1:10)]
## vars n mean sd median trimmed mad min
## age 1 103 26.78 3.27 2.60e+01 26.30 2.97 22.0
## sex* 2 103 1.70 0.46 2.00e+00 1.75 0.00 1.0
## gmat_tot 3 103 616.02 50.69 6.20e+02 615.90 59.30 500.0
## gmat_qpc 4 103 79.73 13.39 8.20e+01 81.05 13.34 39.0
## gmat_vpc 5 103 78.56 16.14 8.10e+01 80.33 16.31 30.0
## gmat_tpc 6 103 84.52 11.01 8.70e+01 85.60 11.86 51.0
## s_avg 7 103 3.09 0.38 3.10e+00 3.10 0.44 2.2
## f_avg 8 103 3.09 0.49 3.25e+00 3.13 0.37 0.0
## quarter 9 103 2.26 1.12 2.00e+00 2.20 1.48 1.0
## work_yrs 10 103 3.68 3.01 3.00e+00 3.11 1.48 0.0
## frstlang 11 103 1.07 0.25 1.00e+00 1.00 0.00 1.0
## salary 12 103 103030.74 17868.80 1.00e+05 101065.06 7413.00 64000.0
## satis 13 103 5.88 0.78 6.00e+00 5.89 1.48 3.0
## max range
## age 40 18.0
## sex* 2 1.0
## gmat_tot 720 220.0
## gmat_qpc 99 60.0
## gmat_vpc 99 69.0
## gmat_tpc 99 48.0
## s_avg 4 1.8
## f_avg 4 4.0
## quarter 4 3.0
## work_yrs 16 16.0
## frstlang 2 1.0
## salary 220000 156000.0
## satis 7 4.0
Distribution of Salary of placed dataset
library(lattice)
histogram(~salary, data = placed.df,
main = "Distribution of Starting Salary",
xlab="Starting Salary of placed",
col='blue' )

Scatter plot of placed people ( Work Experience)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(salary ~ work_yrs ,data=placed.df, main="Scatterplot of placed people ( Work Experience)", xlab="Work Experience of placed", ylab="MBA's Starting Salaries", horizontal=TRUE)
## Warning in plot.window(...): "horizontal" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "horizontal" is not a graphical
## parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "horizontal"
## is not a graphical parameter
## Warning in box(...): "horizontal" is not a graphical parameter
## Warning in title(...): "horizontal" is not a graphical parameter
