Apurva (PGP32242)
04 October 2017
setwd("~/Downloads/IIM Lucknow/TERM 5/DAM")
library(readr)
DeansDilemmaData <- read_csv("~/Downloads/IIM Lucknow/TERM 5/DAM/DeansDilemmaData.csv")
summary(DeansDilemmaData)
SlNo Gender Percent_SSC Board_SSC
Min. : 1.0 Length:391 Min. :37.00 Length:391
1st Qu.: 98.5 Class :character 1st Qu.:56.00 Class :character
Median :196.0 Mode :character Median :64.50 Mode :character
Mean :196.0 Mean :64.65
3rd Qu.:293.5 3rd Qu.:74.00
Max. :391.0 Max. :87.20
Board_CBSE Board_ICSE Percent_HSC Board_HSC
Length:391 Length:391 Min. :40.0 Length:391
Class :character Class :character 1st Qu.:54.0 Class :character
Mode :character Mode :character Median :63.0 Mode :character
Mean :63.8
3rd Qu.:72.0
Max. :94.7
Stream_HSC Percent_Degree Course_Degree Degree_Engg
Length:391 Min. :35.00 Length:391 Length:391
Class :character 1st Qu.:57.52 Class :character Class :character
Mode :character Median :63.00 Mode :character Mode :character
Mean :62.98
3rd Qu.:69.00
Max. :89.00
Experience_Yrs Entrance_Test S.TEST Percentile_ET
Min. :0.0000 Length:391 Min. :0.0000 Min. : 0.00
1st Qu.:0.0000 Class :character 1st Qu.:1.0000 1st Qu.:41.19
Median :0.0000 Mode :character Median :1.0000 Median :62.00
Mean :0.4783 Mean :0.8286 Mean :54.93
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:78.00
Max. :3.0000 Max. :1.0000 Max. :98.69
S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
Min. : 0.00 Min. :50.83 Length:391 Min. :50.00
1st Qu.:41.19 1st Qu.:57.20 Class :character 1st Qu.:53.00
Median :62.00 Median :61.01 Mode :character Median :58.00
Mean :54.93 Mean :61.67 Mean :60.54
3rd Qu.:78.00 3rd Qu.:66.02 3rd Qu.:67.00
Max. :98.69 Max. :77.89 Max. :88.00
Marks_Projectwork Marks_BOCA Placement Salary
Min. :50.00 Min. :50.00 Length:391 Min. : 0
1st Qu.:64.00 1st Qu.:57.00 Class :character 1st Qu.:172800
Median :69.00 Median :63.00 Mode :character Median :240000
Mean :68.36 Mean :64.38 Mean :219078
3rd Qu.:74.00 3rd Qu.:72.50 3rd Qu.:300000
Max. :87.00 Max. :96.00 Max. :940000
median(DeansDilemmaData$Salary)
[1] 240000
table1 <- xtabs( ~Placement, data = DeansDilemmaData)
round(prop.table(table1) * 100, 2)
Placement
Not Placed Placed
20.2 79.8
table_Placed <- subset(DeansDilemmaData, DeansDilemmaData$Placement== "Placed")
summary(table_Placed)
SlNo Gender Percent_SSC Board_SSC
Min. : 1.00 Length:312 Min. :37.00 Length:312
1st Qu.: 92.75 Class :character 1st Qu.:58.00 Class :character
Median :182.50 Mode :character Median :65.92 Mode :character
Mean :186.45 Mean :65.58
3rd Qu.:276.25 3rd Qu.:74.45
Max. :390.00 Max. :87.20
Board_CBSE Board_ICSE Percent_HSC Board_HSC
Length:312 Length:312 Min. :41.0 Length:312
Class :character Class :character 1st Qu.:55.0 Class :character
Mode :character Mode :character Median :63.0 Mode :character
Mean :64.1
3rd Qu.:72.0
Max. :94.7
Stream_HSC Percent_Degree Course_Degree Degree_Engg
Length:312 Min. :35.50 Length:312 Length:312
Class :character 1st Qu.:57.61 Class :character Class :character
Mode :character Median :63.00 Mode :character Mode :character
Mean :63.22
3rd Qu.:69.00
Max. :89.00
Experience_Yrs Entrance_Test S.TEST Percentile_ET
Min. :0.0 Length:312 Min. :0.0000 Min. : 0.00
1st Qu.:0.0 Class :character 1st Qu.:1.0000 1st Qu.:47.29
Median :0.0 Mode :character Median :1.0000 Median :65.00
Mean :0.5 Mean :0.8365 Mean :56.99
3rd Qu.:1.0 3rd Qu.:1.0000 3rd Qu.:80.00
Max. :3.0 Max. :1.0000 Max. :98.00
S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
Min. : 0.00 Min. :51.24 Length:312 Min. :50.00
1st Qu.:47.29 1st Qu.:57.64 Class :character 1st Qu.:53.00
Median :65.00 Median :61.30 Mode :character Median :58.00
Mean :56.99 Mean :61.90 Mean :60.59
3rd Qu.:80.00 3rd Qu.:66.28 3rd Qu.:67.00
Max. :98.00 Max. :77.89 Max. :88.00
Marks_Projectwork Marks_BOCA Placement Salary
Min. :50.00 Min. :50.00 Length:312 Min. :120000
1st Qu.:64.00 1st Qu.:57.00 Class :character 1st Qu.:220000
Median :69.00 Median :63.00 Mode :character Median :260000
Mean :68.74 Mean :64.71 Mean :274550
3rd Qu.:74.00 3rd Qu.:73.00 3rd Qu.:300000
Max. :87.00 Max. :96.00 Max. :940000
median(table_Placed$Salary)
[1] 260000
aggregate(table_Placed$Salary, by=list(Gender = table_Placed$Gender), FUN = mean)
Gender x
1 F 253068.0
2 M 284241.9
hist(table_Placed$Percent_MBA,
main="MBA Performance of Placed Students",
xlab="MBA percentage", ylab="Count", breaks = 3)
Table_notplaced <- subset(DeansDilemmaData, DeansDilemmaData$Placement== "Not Placed")
summary(Table_notplaced)
SlNo Gender Percent_SSC Board_SSC
Min. : 11.0 Length:79 Min. :37.30 Length:79
1st Qu.:136.0 Class :character 1st Qu.:54.03 Class :character
Median :266.0 Mode :character Median :59.20 Mode :character
Mean :233.7 Mean :60.99
3rd Qu.:327.0 3rd Qu.:68.00
Max. :391.0 Max. :85.80
Board_CBSE Board_ICSE Percent_HSC Board_HSC
Length:79 Length:79 Min. :40.00 Length:79
Class :character Class :character 1st Qu.:52.10 Class :character
Mode :character Mode :character Median :62.00 Mode :character
Mean :62.61
3rd Qu.:71.92
Max. :90.00
Stream_HSC Percent_Degree Course_Degree Degree_Engg
Length:79 Min. :35.00 Length:79 Length:79
Class :character 1st Qu.:58.00 Class :character Class :character
Mode :character Median :63.00 Mode :character Mode :character
Mean :62.06
3rd Qu.:67.40
Max. :85.00
Experience_Yrs Entrance_Test S.TEST Percentile_ET
Min. :0.0000 Length:79 Min. :0.0000 Min. : 0.00
1st Qu.:0.0000 Class :character 1st Qu.:1.0000 1st Qu.:21.48
Median :0.0000 Mode :character Median :1.0000 Median :56.15
Mean :0.3924 Mean :0.7975 Mean :46.83
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:71.10
Max. :2.0000 Max. :1.0000 Max. :98.69
S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
Min. : 0.00 Min. :50.83 Length:79 Min. :50.00
1st Qu.:21.48 1st Qu.:56.10 Class :character 1st Qu.:54.00
Median :56.15 Median :60.29 Mode :character Median :59.00
Mean :46.83 Mean :60.74 Mean :60.35
3rd Qu.:71.10 3rd Qu.:65.04 3rd Qu.:66.50
Max. :98.69 Max. :75.71 Max. :79.00
Marks_Projectwork Marks_BOCA Placement Salary
Min. :50.00 Min. :50.00 Length:79 Min. :0
1st Qu.:62.50 1st Qu.:56.50 Class :character 1st Qu.:0
Median :66.00 Median :63.00 Mode :character Median :0
Mean :66.86 Mean :63.06 Mean :0
3rd Qu.:73.00 3rd Qu.:68.50 3rd Qu.:0
Max. :79.00 Max. :82.00 Max. :0
par(mfrow=c(1,2))
hist(table_Placed$Percent_MBA, xlab="MBA Percentage", ylab = "Count", main="MBA performance- placed students", breaks=3)
hist(Table_notplaced$Percent_MBA, xlab="MBA Percentage", ylab = "Count", main="MBA performance- not placed students", breaks=3)
placed_gender.f<-factor(table_Placed$Gender,levels = c("M","F"),labels = c("Male","Female"))
boxplot(table_Placed$Salary~placed_gender.f, horizontal=TRUE, xlab="Salary", ylab="Gender", main="Comparison of Salaries of Males and Females")
placedET <- subset(table_Placed, S.TEST== "1")
summary(placedET)
SlNo Gender Percent_SSC Board_SSC
Min. : 1.0 Length:261 Min. :37.00 Length:261
1st Qu.: 95.0 Class :character 1st Qu.:58.00 Class :character
Median :183.0 Mode :character Median :66.00 Mode :character
Mean :188.8 Mean :65.80
3rd Qu.:276.0 3rd Qu.:74.44
Max. :390.0 Max. :87.20
Board_CBSE Board_ICSE Percent_HSC Board_HSC
Length:261 Length:261 Min. :41.00 Length:261
Class :character Class :character 1st Qu.:55.00 Class :character
Mode :character Mode :character Median :63.00 Mode :character
Mean :64.03
3rd Qu.:72.00
Max. :94.70
Stream_HSC Percent_Degree Course_Degree Degree_Engg
Length:261 Min. :38.00 Length:261 Length:261
Class :character 1st Qu.:57.80 Class :character Class :character
Mode :character Median :63.00 Mode :character Mode :character
Mean :63.33
3rd Qu.:69.30
Max. :89.00
Experience_Yrs Entrance_Test S.TEST Percentile_ET
Min. :0.0000 Length:261 Min. :1 Min. : 0.00
1st Qu.:0.0000 Class :character 1st Qu.:1 1st Qu.:58.00
Median :0.0000 Mode :character Median :1 Median :68.71
Mean :0.4904 Mean :1 Mean :68.12
3rd Qu.:1.0000 3rd Qu.:1 3rd Qu.:83.00
Max. :3.0000 Max. :1 Max. :98.00
S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
Min. : 0.00 Min. :51.24 Length:261 Min. :50.00
1st Qu.:58.00 1st Qu.:57.90 Class :character 1st Qu.:53.00
Median :68.71 Median :61.63 Mode :character Median :59.00
Mean :68.12 Mean :62.09 Mean :60.87
3rd Qu.:83.00 3rd Qu.:66.22 3rd Qu.:67.00
Max. :98.00 Max. :77.89 Max. :88.00
Marks_Projectwork Marks_BOCA Placement Salary
Min. :50.00 Min. :50.00 Length:261 Min. :120000
1st Qu.:64.00 1st Qu.:57.00 Class :character 1st Qu.:220000
Median :70.00 Median :63.00 Mode :character Median :260000
Mean :69.26 Mean :64.93 Mean :274860
3rd Qu.:74.00 3rd Qu.:73.00 3rd Qu.:300000
Max. :87.00 Max. :96.00 Max. :940000
library(car)
scatterplotMatrix(~Salary+Percent_MBA+Percentile_ET, data = placedET, main="Scatter Plot Matrix")
mean(table_Placed$Salary)
[1] 274550
shapiro.test(table_Placed$Salary)
Shapiro-Wilk normality test
data: table_Placed$Salary
W = 0.84168, p-value < 2.2e-16
# P value < 0.05; hence we reject the null hypothesis that the data are normally distributed
aggregate(table_Placed$Salary, by=list(Gender = table_Placed$Gender), FUN = mean)
Gender x
1 F 253068.0
2 M 284241.9
library(gplots)
plotmeans(table_Placed$Salary~table_Placed$Gender,mean.labels =TRUE)
aggregate(table_Placed$Salary, by=list(Gender = table_Placed$Gender), FUN = var)
Gender x
1 F 5504236572
2 M 9886408520
var.test(table_Placed$Salary~table_Placed$Gender)
F test to compare two variances
data: table_Placed$Salary by table_Placed$Gender
F = 0.55675, num df = 96, denom df = 214, p-value = 0.00135
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.3999212 0.7927360
sample estimates:
ratio of variances
0.5567478
# We reject the null hypothesis; variances are statistically significantly different
t.test(Salary~Gender,data = table_Placed)
Welch Two Sample t-test
data: Salary by Gender
t = -3.0757, df = 243.03, p-value = 0.00234
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-51138.42 -11209.22
sample estimates:
mean in group F mean in group M
253068.0 284241.9
# We reject the null hypothesis that the means are equal
aggregate(table_Placed$Salary, by=list(Gender = table_Placed$Degree_Engg), FUN = mean)
Gender x
1 No 269161.7
2 Yes 325200.0
library(gplots)
plotmeans(table_Placed$Salary~table_Placed$Degree_Engg, mean.labels =TRUE)
aggregate(table_Placed$Salary, by=list(Gender = table_Placed$Degree_Engg), FUN = var)
Gender x
1 No 7996660450
2 Yes 12994648276
var.test(table_Placed$Salary~table_Placed$Degree_Engg)
F test to compare two variances
data: table_Placed$Salary by table_Placed$Degree_Engg
F = 0.61538, num df = 281, denom df = 29, p-value = 0.05124
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.3341346 1.0025622
sample estimates:
ratio of variances
0.6153811
# We fail to reject the null hypothesis that the variances are equal
t.test(Salary~Degree_Engg, data = table_Placed)
Welch Two Sample t-test
data: Salary by Degree_Engg
t = -2.6085, df = 32.907, p-value = 0.01357
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-99750.14 -12326.45
sample estimates:
mean in group No mean in group Yes
269161.7 325200.0
# We reject the null hypothesis that the means are equal