TASK 2b - Reading the dataset
deanDilemma<- read.csv("Data - Deans Dilemma.csv")
View(deanDilemma)
TASK 2c - Summarize the dataset
summary(deanDilemma[c(4,8,11,17,18,19,21,22,23,26)])
## Percent_SSC Percent_HSC Percent_Degree Percentile_ET
## Min. :37.00 Min. :40.0 Min. :35.00 Min. : 0.00
## 1st Qu.:56.00 1st Qu.:54.0 1st Qu.:57.52 1st Qu.:41.19
## Median :64.50 Median :63.0 Median :63.00 Median :62.00
## Mean :64.65 Mean :63.8 Mean :62.98 Mean :54.93
## 3rd Qu.:74.00 3rd Qu.:72.0 3rd Qu.:69.00 3rd Qu.:78.00
## Max. :87.20 Max. :94.7 Max. :89.00 Max. :98.69
## S.TEST.SCORE Percent_MBA Marks_Communication Marks_Projectwork
## Min. : 0.00 Min. :50.83 Min. :50.00 Min. :50.00
## 1st Qu.:41.19 1st Qu.:57.20 1st Qu.:53.00 1st Qu.:64.00
## Median :62.00 Median :61.01 Median :58.00 Median :69.00
## Mean :54.93 Mean :61.67 Mean :60.54 Mean :68.36
## 3rd Qu.:78.00 3rd Qu.:66.02 3rd Qu.:67.00 3rd Qu.:74.00
## Max. :98.69 Max. :77.89 Max. :88.00 Max. :87.00
## Marks_BOCA Salary
## Min. :50.00 Min. : 0
## 1st Qu.:57.00 1st Qu.:172800
## Median :63.00 Median :240000
## Mean :64.38 Mean :219078
## 3rd Qu.:72.50 3rd Qu.:300000
## Max. :96.00 Max. :940000
library(psych)
describe(deanDilemma[c(4,8,11,17,18,19,21,22,23,26)])
## vars n mean sd median trimmed
## Percent_SSC 1 391 64.65 10.96 64.50 64.76
## Percent_HSC 2 391 63.80 11.42 63.00 63.34
## Percent_Degree 3 391 62.98 8.92 63.00 62.91
## Percentile_ET 4 391 54.93 31.17 62.00 56.87
## S.TEST.SCORE 5 391 54.93 31.17 62.00 56.87
## Percent_MBA 6 391 61.67 5.85 61.01 61.45
## Marks_Communication 7 391 60.54 8.82 58.00 59.68
## Marks_Projectwork 8 391 68.36 7.15 69.00 68.60
## Marks_BOCA 9 391 64.38 9.58 63.00 64.08
## Salary 10 391 219078.26 138311.65 240000.00 217011.50
## mad min max range skew kurtosis
## Percent_SSC 12.60 37.00 87.20 50.20 -0.06 -0.72
## Percent_HSC 13.34 40.00 94.70 54.70 0.29 -0.67
## Percent_Degree 8.90 35.00 89.00 54.00 0.05 0.24
## Percentile_ET 25.20 0.00 98.69 98.69 -0.74 -0.69
## S.TEST.SCORE 25.20 0.00 98.69 98.69 -0.74 -0.69
## Percent_MBA 6.39 50.83 77.89 27.06 0.34 -0.52
## Marks_Communication 8.90 50.00 88.00 38.00 0.74 -0.25
## Marks_Projectwork 7.41 50.00 87.00 37.00 -0.26 -0.27
## Marks_BOCA 11.86 50.00 96.00 46.00 0.29 -0.85
## Salary 88956.00 0.00 940000.00 940000.00 0.24 1.74
## se
## Percent_SSC 0.55
## Percent_HSC 0.58
## Percent_Degree 0.45
## Percentile_ET 1.58
## S.TEST.SCORE 1.58
## Percent_MBA 0.30
## Marks_Communication 0.45
## Marks_Projectwork 0.36
## Marks_BOCA 0.48
## Salary 6994.72
TASK 3a median salary of all the students:
median(deanDilemma$Salary)
## [1] 240000
TASK 3b percentage of students who were placed:
p<- table(deanDilemma$Placement_B)
percentagePlaced<- prop.table(p)*100
format(percentagePlaced[2], digits=2, nsmall=2)
## 1
## "79.80"
TASK 3c Subset of placed students:
placed<- deanDilemma[which(deanDilemma$Placement=='Placed'),]
TASK 3d Median salary of placed student:
median(placed$Salary)
## [1] 260000
TASK 3e Mean salary gender wise who were placed:
aggregate(placed$Salary, by=list(placed$Gender), mean)
## Group.1 x
## 1 F 253068.0
## 2 M 284241.9
TASK 3f
Performance of placed student in their MBA:
hist(placed$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of placed students", col="grey")
TASK 3g Not placed students dataset
notplaced<- deanDilemma[which(deanDilemma$Placement_B==0),]
nrow(notplaced)
## [1] 79
TASK 3e visually comparing the MBA performance of Placed VS Not Placed students:
par(mfrow=c(1,2))
hist(placed$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of placed students", col="skyblue")
hist(notplaced$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of not placed students", col="skyblue")
par(mfrow=c(1,1))
TASK 3i comparing the distribution of salaries of males and females who were placed:
library(lattice)
bwplot( Gender ~ Salary, data=placed, horizontal = TRUE, main="Comparision of Salaries of Male and Female", col='black', ylab = "Gender")
TASK 3j
placedET<- deanDilemma[which(deanDilemma$Placement_B == 1 & deanDilemma$S.TEST == 1),]
TASK 3k
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET, cex=0.6,
data=placedET,main="Scatter Plot Matrix")