TASK 2b - Reading the dataset

deanDilemma<- read.csv("Data - Deans Dilemma.csv")
View(deanDilemma)

TASK 2c - Summarize the dataset

summary(deanDilemma[c(4,8,11,17,18,19,21,22,23,26)])
##   Percent_SSC     Percent_HSC   Percent_Degree  Percentile_ET  
##  Min.   :37.00   Min.   :40.0   Min.   :35.00   Min.   : 0.00  
##  1st Qu.:56.00   1st Qu.:54.0   1st Qu.:57.52   1st Qu.:41.19  
##  Median :64.50   Median :63.0   Median :63.00   Median :62.00  
##  Mean   :64.65   Mean   :63.8   Mean   :62.98   Mean   :54.93  
##  3rd Qu.:74.00   3rd Qu.:72.0   3rd Qu.:69.00   3rd Qu.:78.00  
##  Max.   :87.20   Max.   :94.7   Max.   :89.00   Max.   :98.69  
##   S.TEST.SCORE    Percent_MBA    Marks_Communication Marks_Projectwork
##  Min.   : 0.00   Min.   :50.83   Min.   :50.00       Min.   :50.00    
##  1st Qu.:41.19   1st Qu.:57.20   1st Qu.:53.00       1st Qu.:64.00    
##  Median :62.00   Median :61.01   Median :58.00       Median :69.00    
##  Mean   :54.93   Mean   :61.67   Mean   :60.54       Mean   :68.36    
##  3rd Qu.:78.00   3rd Qu.:66.02   3rd Qu.:67.00       3rd Qu.:74.00    
##  Max.   :98.69   Max.   :77.89   Max.   :88.00       Max.   :87.00    
##    Marks_BOCA        Salary      
##  Min.   :50.00   Min.   :     0  
##  1st Qu.:57.00   1st Qu.:172800  
##  Median :63.00   Median :240000  
##  Mean   :64.38   Mean   :219078  
##  3rd Qu.:72.50   3rd Qu.:300000  
##  Max.   :96.00   Max.   :940000
library(psych)
describe(deanDilemma[c(4,8,11,17,18,19,21,22,23,26)])
##                     vars   n      mean        sd    median   trimmed
## Percent_SSC            1 391     64.65     10.96     64.50     64.76
## Percent_HSC            2 391     63.80     11.42     63.00     63.34
## Percent_Degree         3 391     62.98      8.92     63.00     62.91
## Percentile_ET          4 391     54.93     31.17     62.00     56.87
## S.TEST.SCORE           5 391     54.93     31.17     62.00     56.87
## Percent_MBA            6 391     61.67      5.85     61.01     61.45
## Marks_Communication    7 391     60.54      8.82     58.00     59.68
## Marks_Projectwork      8 391     68.36      7.15     69.00     68.60
## Marks_BOCA             9 391     64.38      9.58     63.00     64.08
## Salary                10 391 219078.26 138311.65 240000.00 217011.50
##                          mad   min       max     range  skew kurtosis
## Percent_SSC            12.60 37.00     87.20     50.20 -0.06    -0.72
## Percent_HSC            13.34 40.00     94.70     54.70  0.29    -0.67
## Percent_Degree          8.90 35.00     89.00     54.00  0.05     0.24
## Percentile_ET          25.20  0.00     98.69     98.69 -0.74    -0.69
## S.TEST.SCORE           25.20  0.00     98.69     98.69 -0.74    -0.69
## Percent_MBA             6.39 50.83     77.89     27.06  0.34    -0.52
## Marks_Communication     8.90 50.00     88.00     38.00  0.74    -0.25
## Marks_Projectwork       7.41 50.00     87.00     37.00 -0.26    -0.27
## Marks_BOCA             11.86 50.00     96.00     46.00  0.29    -0.85
## Salary              88956.00  0.00 940000.00 940000.00  0.24     1.74
##                          se
## Percent_SSC            0.55
## Percent_HSC            0.58
## Percent_Degree         0.45
## Percentile_ET          1.58
## S.TEST.SCORE           1.58
## Percent_MBA            0.30
## Marks_Communication    0.45
## Marks_Projectwork      0.36
## Marks_BOCA             0.48
## Salary              6994.72

TASK 3a median salary of all the students:

median(deanDilemma$Salary)
## [1] 240000

TASK 3b percentage of students who were placed:

p<- table(deanDilemma$Placement_B)
percentagePlaced<- prop.table(p)*100
format(percentagePlaced[2], digits=2, nsmall=2)
##       1 
## "79.80"

TASK 3c Subset of placed students:

placed<- deanDilemma[which(deanDilemma$Placement=='Placed'),]

TASK 3d Median salary of placed student:

median(placed$Salary)
## [1] 260000

TASK 3e Mean salary gender wise who were placed:

aggregate(placed$Salary, by=list(placed$Gender), mean)
##   Group.1        x
## 1       F 253068.0
## 2       M 284241.9

TASK 3f

Performance of placed student in their MBA:

hist(placed$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of placed students", col="grey")

TASK 3g Not placed students dataset

notplaced<- deanDilemma[which(deanDilemma$Placement_B==0),]
nrow(notplaced)
## [1] 79

TASK 3e visually comparing the MBA performance of Placed VS Not Placed students:

par(mfrow=c(1,2))

hist(placed$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of placed students", col="skyblue")

hist(notplaced$Percent_MBA, breaks = 3, xlab = "MBA Percentage", ylab = "Counts", main = " MBA Performance of not placed students", col="skyblue")

par(mfrow=c(1,1))

TASK 3i comparing the distribution of salaries of males and females who were placed:

library(lattice)

bwplot( Gender ~ Salary, data=placed, horizontal = TRUE, main="Comparision of Salaries of Male and Female", col='black', ylab = "Gender")

TASK 3j

placedET<- deanDilemma[which(deanDilemma$Placement_B == 1 & deanDilemma$S.TEST == 1),]

TASK 3k

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET, cex=0.6,
                       data=placedET,main="Scatter Plot Matrix")