This is my case study on Deans Dilemma Project which is to investigate on the following matters in R.
deansdilemma.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))
View(deansdilemma.df)
summary(deansdilemma.df)
## SlNo Gender Gender.B Percent_SSC Board_SSC
## Min. : 1.0 F:127 Min. :0.0000 Min. :37.00 CBSE :113
## 1st Qu.: 98.5 M:264 1st Qu.:0.0000 1st Qu.:56.00 ICSE : 77
## Median :196.0 Median :0.0000 Median :64.50 Others:201
## Mean :196.0 Mean :0.3248 Mean :64.65
## 3rd Qu.:293.5 3rd Qu.:1.0000 3rd Qu.:74.00
## Max. :391.0 Max. :1.0000 Max. :87.20
##
## Board_CBSE Board_ICSE Percent_HSC Board_HSC
## Min. :0.000 Min. :0.0000 Min. :40.0 CBSE : 96
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:54.0 ISC : 48
## Median :0.000 Median :0.0000 Median :63.0 Others:247
## Mean :0.289 Mean :0.1969 Mean :63.8
## 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:72.0
## Max. :1.000 Max. :1.0000 Max. :94.7
##
## Stream_HSC Percent_Degree Course_Degree
## Arts : 18 Min. :35.00 Arts : 13
## Commerce:222 1st Qu.:57.52 Commerce :117
## Science :151 Median :63.00 Computer Applications: 32
## Mean :62.98 Engineering : 37
## 3rd Qu.:69.00 Management :163
## Max. :89.00 Others : 5
## Science : 24
## Degree_Engg Experience_Yrs Entrance_Test S.TEST
## Min. :0.00000 Min. :0.0000 MAT :265 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 None : 67 1st Qu.:1.0000
## Median :0.00000 Median :0.0000 K-MAT : 24 Median :1.0000
## Mean :0.09463 Mean :0.4783 CAT : 22 Mean :0.8286
## 3rd Qu.:0.00000 3rd Qu.:1.0000 PGCET : 8 3rd Qu.:1.0000
## Max. :1.00000 Max. :3.0000 GCET : 2 Max. :1.0000
## (Other): 3
## Percentile_ET S.TEST.SCORE Percent_MBA
## Min. : 0.00 Min. : 0.00 Min. :50.83
## 1st Qu.:41.19 1st Qu.:41.19 1st Qu.:57.20
## Median :62.00 Median :62.00 Median :61.01
## Mean :54.93 Mean :54.93 Mean :61.67
## 3rd Qu.:78.00 3rd Qu.:78.00 3rd Qu.:66.02
## Max. :98.69 Max. :98.69 Max. :77.89
##
## Specialization_MBA Marks_Communication Marks_Projectwork
## Marketing & Finance:222 Min. :50.00 Min. :50.00
## Marketing & HR :156 1st Qu.:53.00 1st Qu.:64.00
## Marketing & IB : 13 Median :58.00 Median :69.00
## Mean :60.54 Mean :68.36
## 3rd Qu.:67.00 3rd Qu.:74.00
## Max. :88.00 Max. :87.00
##
## Marks_BOCA Placement Placement_B Salary
## Min. :50.00 Not Placed: 79 Min. :0.000 Min. : 0
## 1st Qu.:57.00 Placed :312 1st Qu.:1.000 1st Qu.:172800
## Median :63.00 Median :1.000 Median :240000
## Mean :64.38 Mean :0.798 Mean :219078
## 3rd Qu.:72.50 3rd Qu.:1.000 3rd Qu.:300000
## Max. :96.00 Max. :1.000 Max. :940000
##
library(psych)
describe(deansdilemma.df$Percent_SSC)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 64.65 10.96 64.5 64.76 12.6 37 87.2 50.2 -0.06 -0.72
## se
## X1 0.55
describe(deansdilemma.df$Percent_HSC)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 63.8 11.42 63 63.34 13.34 40 94.7 54.7 0.29 -0.67
## se
## X1 0.58
describe(deansdilemma.df$Percent_Degree)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 391 62.98 8.92 63 62.91 8.9 35 89 54 0.05 0.24 0.45
describe(deansdilemma.df$Percentile_ET)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 54.93 31.17 62 56.87 25.2 0 98.69 98.69 -0.74 -0.69
## se
## X1 1.58
describe(deansdilemma.df$S.TEST.SCORE)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 54.93 31.17 62 56.87 25.2 0 98.69 98.69 -0.74 -0.69
## se
## X1 1.58
describe(deansdilemma.df$Percent_MBA)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 61.67 5.85 61.01 61.45 6.39 50.83 77.89 27.06 0.34 -0.52
## se
## X1 0.3
describe(deansdilemma.df$Marks_Communication)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 391 60.54 8.82 58 59.68 8.9 50 88 38 0.74 -0.25 0.45
describe(deansdilemma.df$Marks_Projectwork)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 68.36 7.15 69 68.6 7.41 50 87 37 -0.26 -0.27
## se
## X1 0.36
describe(deansdilemma.df$Marks_BOCA)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 391 64.38 9.58 63 64.08 11.86 50 96 46 0.29 -0.85
## se
## X1 0.48
describe(deansdilemma.df$Salary)
## vars n mean sd median trimmed mad min max range skew
## X1 1 391 219078.3 138311.6 240000 217011.5 88956 0 940000 940000 0.24
## kurtosis se
## X1 1.74 6994.72
median(deansdilemma.df$Salary)
## [1] 240000
mytable <- with(deansdilemma.df, table(Placement))
mytable
## Placement
## Not Placed Placed
## 79 312
prop.table(mytable)*100
## Placement
## Not Placed Placed
## 20.2046 79.7954
placed.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==1),]
View(placed.df)
median(placed.df$Salary)
## [1] 260000
aggregate(placed.df$Salary, by=list(Gender=placed.df$Gender),mean)
## Gender x
## 1 F 253068.0
## 2 M 284241.9
hist(placed.df$Percent_MBA,
main="MBA Performance of placed students",
xlab="MBA Percentage",
ylab="Count",
xlim=c(50,80), ylim=c(0,150),
breaks=3,
col="lightblue")
notplaced.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==0),]
View(notplaced.df)
par=(mfrow=c(2,1))
with(placed.df, hist(placed.df$Percent_MBA,
main="MBA Performance of placed students",
xlab="MBA Percentage",
ylab="Count",
xlim=c(50,80), ylim=c(0,150),
breaks=3,
col="lightblue"))
with(notplaced.df, hist(notplaced.df$Percent_MBA,
main="MBA Performance of not placed students",
xlab="MBA Percentage",
ylab="Count",
xlim=c(50,80),
breaks=3,
col="lightblue"))
boxplot(Salary ~ Gender, data=placed.df, horizental=TRUE,
ylab="Salary", xlab="Gender",
main="Comparison of Salaries of Males and Females")
axis(side=2, at=c(1,2), labels=c("Females","Males"))
placedET.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==1 & deansdilemma.df$S.TEST==1),]
View(placedET.df)
library(car)
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET, cex=0.6,
data=placedET.df, diagonal="density")