The answers to questions 2 and 3 are as follows :
2(b)
dean.df <- read.csv(paste("abcd.csv",sep = ""))
View(dean.df)
2(c) Summary of the entire table
summary(dean.df)
## SlNo Gender Gender.B Percent_SSC Board_SSC
## Min. : 1.0 F:127 Min. :0.0000 Min. :37.00 CBSE :113
## 1st Qu.: 98.5 M:264 1st Qu.:0.0000 1st Qu.:56.00 ICSE : 77
## Median :196.0 Median :0.0000 Median :64.50 Others:201
## Mean :196.0 Mean :0.3248 Mean :64.65
## 3rd Qu.:293.5 3rd Qu.:1.0000 3rd Qu.:74.00
## Max. :391.0 Max. :1.0000 Max. :87.20
##
## Board_CBSE Board_ICSE Percent_HSC Board_HSC
## Min. :0.000 Min. :0.0000 Min. :40.0 CBSE : 96
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:54.0 ISC : 48
## Median :0.000 Median :0.0000 Median :63.0 Others:247
## Mean :0.289 Mean :0.1969 Mean :63.8
## 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:72.0
## Max. :1.000 Max. :1.0000 Max. :94.7
##
## Stream_HSC Percent_Degree Course_Degree
## Arts : 18 Min. :35.00 Arts : 13
## Commerce:222 1st Qu.:57.52 Commerce :117
## Science :151 Median :63.00 Computer Applications: 32
## Mean :62.98 Engineering : 37
## 3rd Qu.:69.00 Management :163
## Max. :89.00 Others : 5
## Science : 24
## Degree_Engg Experience_Yrs Entrance_Test S.TEST
## Min. :0.00000 Min. :0.0000 MAT :265 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 None : 67 1st Qu.:1.0000
## Median :0.00000 Median :0.0000 K-MAT : 24 Median :1.0000
## Mean :0.09463 Mean :0.4783 CAT : 22 Mean :0.8286
## 3rd Qu.:0.00000 3rd Qu.:1.0000 PGCET : 8 3rd Qu.:1.0000
## Max. :1.00000 Max. :3.0000 GCET : 2 Max. :1.0000
## (Other): 3
## Percentile_ET S.TEST.SCORE Percent_MBA
## Min. : 0.00 Min. : 0.00 Min. :50.83
## 1st Qu.:41.19 1st Qu.:41.19 1st Qu.:57.20
## Median :62.00 Median :62.00 Median :61.01
## Mean :54.93 Mean :54.93 Mean :61.67
## 3rd Qu.:78.00 3rd Qu.:78.00 3rd Qu.:66.02
## Max. :98.69 Max. :98.69 Max. :77.89
##
## Specialization_MBA Marks_Communication Marks_Projectwork
## Marketing & Finance:222 Min. :50.00 Min. :50.00
## Marketing & HR :156 1st Qu.:53.00 1st Qu.:64.00
## Marketing & IB : 13 Median :58.00 Median :69.00
## Mean :60.54 Mean :68.36
## 3rd Qu.:67.00 3rd Qu.:74.00
## Max. :88.00 Max. :87.00
##
## Marks_BOCA Placement Placement_B Salary
## Min. :50.00 Not Placed: 79 Min. :0.000 Min. : 0
## 1st Qu.:57.00 Placed :312 1st Qu.:1.000 1st Qu.:172800
## Median :63.00 Median :1.000 Median :240000
## Mean :64.38 Mean :0.798 Mean :219078
## 3rd Qu.:72.50 3rd Qu.:1.000 3rd Qu.:300000
## Max. :96.00 Max. :1.000 Max. :940000
##
Running describe only for the important coloumns - because for other coloumns it will generate senseless data
library(psych)
describe(dean.df[ , c(4, 8, 11, 17, 18, 19, 21, 22, 23, 26)])
## vars n mean sd median trimmed
## Percent_SSC 1 391 64.65 10.96 64.50 64.76
## Percent_HSC 2 391 63.80 11.42 63.00 63.34
## Percent_Degree 3 391 62.98 8.92 63.00 62.91
## Percentile_ET 4 391 54.93 31.17 62.00 56.87
## S.TEST.SCORE 5 391 54.93 31.17 62.00 56.87
## Percent_MBA 6 391 61.67 5.85 61.01 61.45
## Marks_Communication 7 391 60.54 8.82 58.00 59.68
## Marks_Projectwork 8 391 68.36 7.15 69.00 68.60
## Marks_BOCA 9 391 64.38 9.58 63.00 64.08
## Salary 10 391 219078.26 138311.65 240000.00 217011.50
## mad min max range skew kurtosis
## Percent_SSC 12.60 37.00 87.20 50.20 -0.06 -0.72
## Percent_HSC 13.34 40.00 94.70 54.70 0.29 -0.67
## Percent_Degree 8.90 35.00 89.00 54.00 0.05 0.24
## Percentile_ET 25.20 0.00 98.69 98.69 -0.74 -0.69
## S.TEST.SCORE 25.20 0.00 98.69 98.69 -0.74 -0.69
## Percent_MBA 6.39 50.83 77.89 27.06 0.34 -0.52
## Marks_Communication 8.90 50.00 88.00 38.00 0.74 -0.25
## Marks_Projectwork 7.41 50.00 87.00 37.00 -0.26 -0.27
## Marks_BOCA 11.86 50.00 96.00 46.00 0.29 -0.85
## Salary 88956.00 0.00 940000.00 940000.00 0.24 1.74
## se
## Percent_SSC 0.55
## Percent_HSC 0.58
## Percent_Degree 0.45
## Percentile_ET 1.58
## S.TEST.SCORE 1.58
## Percent_MBA 0.30
## Marks_Communication 0.45
## Marks_Projectwork 0.36
## Marks_BOCA 0.48
## Salary 6994.72
3(a) Median of salary
median(dean.df$Salary)
## [1] 240000
3(b) percentage placed, the value below 1 is the required percentage
abc <- prop.table(table(dean.df$Placement_B))*100
abc
##
## 0 1
## 20.2046 79.7954
abc[2]
## 1
## 79.7954
3(c)
placed.df <- dean.df[which(dean.df$Placement_B == 1) , ]
View(placed.df)
3(d)
median(placed.df$Salary)
## [1] 260000
3(e)
mytable11 <- aggregate(dean.df$Salary , by = list(sex = dean.df$Gender) , mean)
mytable11
## sex x
## 1 F 193288.2
## 2 M 231484.8
3(f)
hist(placed.df$Percent_MBA , main = "MBA Performance of Placed Students" , xlab = "MBA Percentage" , ylab = "Count" , xlim = c(50,80) , breaks = 2, col = "gray")
3(g)
notplaced.df <- dean.df[which(dean.df$Placement_B == 0) , ]
View(notplaced.df)
3(h)
par(mfcol = c(1,2) , cex = 1 , cex.main = 0.8)
hist(placed.df$Percent_MBA , main = "MBA Performance of Placed Students" , xlab = "MBA Percentage" , ylab = "Count" , breaks = 2 , col = "gray" , xlim = c(50,80))
hist(notplaced.df$Percent_MBA , main = "MBA Performance of not placed Students" , xlab = "MBA Percentage" , ylab = "Count" , breaks = 2 , col = "gray" , xlim = c(50,80))
3(i)
boxplot(placed.df$Salary ~ placed.df$Gender , horizontal = TRUE , main = "Comparison of Salaries of Males and Females" , xlab = "Salary" , ylab = "Gender" , yaxt = "n" , range = 1.5)
axis(side = 2 , at = c(1,2) , labels = c("Female","Male"))
3(j)
placedET.df <- placed.df[which(placed.df$S.TEST == 1) , ]
View(placedET.df)
3(k)
library("car", lib.loc="~/R/win-library/3.4")
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET , cex = 0.6 , data = placedET.df)
Thanking you
Nihir Gulati