This is my case study on Deans Dilemma Project which is to investigate on the following matters in R.

Read the data set into RStudio

deansdilemma.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))
View(deansdilemma.df)

Summarize the dataset

summary(deansdilemma.df)
##       SlNo       Gender     Gender.B       Percent_SSC     Board_SSC  
##  Min.   :  1.0   F:127   Min.   :0.0000   Min.   :37.00   CBSE  :113  
##  1st Qu.: 98.5   M:264   1st Qu.:0.0000   1st Qu.:56.00   ICSE  : 77  
##  Median :196.0           Median :0.0000   Median :64.50   Others:201  
##  Mean   :196.0           Mean   :0.3248   Mean   :64.65               
##  3rd Qu.:293.5           3rd Qu.:1.0000   3rd Qu.:74.00               
##  Max.   :391.0           Max.   :1.0000   Max.   :87.20               
##                                                                       
##    Board_CBSE      Board_ICSE      Percent_HSC    Board_HSC  
##  Min.   :0.000   Min.   :0.0000   Min.   :40.0   CBSE  : 96  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:54.0   ISC   : 48  
##  Median :0.000   Median :0.0000   Median :63.0   Others:247  
##  Mean   :0.289   Mean   :0.1969   Mean   :63.8               
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:72.0               
##  Max.   :1.000   Max.   :1.0000   Max.   :94.7               
##                                                              
##     Stream_HSC  Percent_Degree                Course_Degree
##  Arts    : 18   Min.   :35.00   Arts                 : 13  
##  Commerce:222   1st Qu.:57.52   Commerce             :117  
##  Science :151   Median :63.00   Computer Applications: 32  
##                 Mean   :62.98   Engineering          : 37  
##                 3rd Qu.:69.00   Management           :163  
##                 Max.   :89.00   Others               :  5  
##                                 Science              : 24  
##   Degree_Engg      Experience_Yrs   Entrance_Test     S.TEST      
##  Min.   :0.00000   Min.   :0.0000   MAT    :265   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   None   : 67   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   K-MAT  : 24   Median :1.0000  
##  Mean   :0.09463   Mean   :0.4783   CAT    : 22   Mean   :0.8286  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   PGCET  :  8   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :3.0000   GCET   :  2   Max.   :1.0000  
##                                     (Other):  3                   
##  Percentile_ET    S.TEST.SCORE    Percent_MBA   
##  Min.   : 0.00   Min.   : 0.00   Min.   :50.83  
##  1st Qu.:41.19   1st Qu.:41.19   1st Qu.:57.20  
##  Median :62.00   Median :62.00   Median :61.01  
##  Mean   :54.93   Mean   :54.93   Mean   :61.67  
##  3rd Qu.:78.00   3rd Qu.:78.00   3rd Qu.:66.02  
##  Max.   :98.69   Max.   :98.69   Max.   :77.89  
##                                                 
##            Specialization_MBA Marks_Communication Marks_Projectwork
##  Marketing & Finance:222      Min.   :50.00       Min.   :50.00    
##  Marketing & HR     :156      1st Qu.:53.00       1st Qu.:64.00    
##  Marketing & IB     : 13      Median :58.00       Median :69.00    
##                               Mean   :60.54       Mean   :68.36    
##                               3rd Qu.:67.00       3rd Qu.:74.00    
##                               Max.   :88.00       Max.   :87.00    
##                                                                    
##    Marks_BOCA         Placement    Placement_B        Salary      
##  Min.   :50.00   Not Placed: 79   Min.   :0.000   Min.   :     0  
##  1st Qu.:57.00   Placed    :312   1st Qu.:1.000   1st Qu.:172800  
##  Median :63.00                    Median :1.000   Median :240000  
##  Mean   :64.38                    Mean   :0.798   Mean   :219078  
##  3rd Qu.:72.50                    3rd Qu.:1.000   3rd Qu.:300000  
##  Max.   :96.00                    Max.   :1.000   Max.   :940000  
## 
library(psych)
describe(deansdilemma.df$Percent_SSC)
##    vars   n  mean    sd median trimmed  mad min  max range  skew kurtosis
## X1    1 391 64.65 10.96   64.5   64.76 12.6  37 87.2  50.2 -0.06    -0.72
##      se
## X1 0.55
describe(deansdilemma.df$Percent_HSC)
##    vars   n mean    sd median trimmed   mad min  max range skew kurtosis
## X1    1 391 63.8 11.42     63   63.34 13.34  40 94.7  54.7 0.29    -0.67
##      se
## X1 0.58
describe(deansdilemma.df$Percent_Degree)
##    vars   n  mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 391 62.98 8.92     63   62.91 8.9  35  89    54 0.05     0.24 0.45
describe(deansdilemma.df$Percentile_ET)
##    vars   n  mean    sd median trimmed  mad min   max range  skew kurtosis
## X1    1 391 54.93 31.17     62   56.87 25.2   0 98.69 98.69 -0.74    -0.69
##      se
## X1 1.58
describe(deansdilemma.df$S.TEST.SCORE)
##    vars   n  mean    sd median trimmed  mad min   max range  skew kurtosis
## X1    1 391 54.93 31.17     62   56.87 25.2   0 98.69 98.69 -0.74    -0.69
##      se
## X1 1.58
describe(deansdilemma.df$Percent_MBA)
##    vars   n  mean   sd median trimmed  mad   min   max range skew kurtosis
## X1    1 391 61.67 5.85  61.01   61.45 6.39 50.83 77.89 27.06 0.34    -0.52
##     se
## X1 0.3
describe(deansdilemma.df$Marks_Communication)
##    vars   n  mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 391 60.54 8.82     58   59.68 8.9  50  88    38 0.74    -0.25 0.45
describe(deansdilemma.df$Marks_Projectwork)
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis
## X1    1 391 68.36 7.15     69    68.6 7.41  50  87    37 -0.26    -0.27
##      se
## X1 0.36
describe(deansdilemma.df$Marks_BOCA)
##    vars   n  mean   sd median trimmed   mad min max range skew kurtosis
## X1    1 391 64.38 9.58     63   64.08 11.86  50  96    46 0.29    -0.85
##      se
## X1 0.48
describe(deansdilemma.df$Salary)
##    vars   n     mean       sd median  trimmed   mad min    max  range skew
## X1    1 391 219078.3 138311.6 240000 217011.5 88956   0 940000 940000 0.24
##    kurtosis      se
## X1     1.74 6994.72

Use R to calculate the median salary of all the students in the data sample

median(deansdilemma.df$Salary)
## [1] 240000

Use R to calculate the percentage of students who were placed

mytable <- with(deansdilemma.df, table(Placement))  
mytable
## Placement
## Not Placed     Placed 
##         79        312
prop.table(mytable)*100
## Placement
## Not Placed     Placed 
##    20.2046    79.7954

Use R to create a dataframe called placed, that contains a subset of only those students who were successfully placed.

placed.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==1),]
View(placed.df)

Use R to find the median salary of students who were placed.

median(placed.df$Salary)
## [1] 260000

Use R to create a table showing the mean salary of males and females, who were placed.

aggregate(placed.df$Salary, by=list(Gender=placed.df$Gender),mean)
##   Gender        x
## 1      F 253068.0
## 2      M 284241.9

Use R to generate the following histogram showing a breakup of the MBA performance of the students who were placed

hist(placed.df$Percent_MBA,
     main="MBA Performance of placed students",
     xlab="MBA Percentage",
     ylab="Count",
     xlim=c(50,80), ylim=c(0,150),
     breaks=3,
     col="lightblue")

Create a dataframe called notplaced, that contains a subset of only those students who were NOT placed after their MBA.

notplaced.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==0),]
View(notplaced.df)

Draw two histograms side-by-side, visually comparing the MBA performance of Placed and Not Placed students,

par=(mfrow=c(2,1))
with(placed.df, hist(placed.df$Percent_MBA,
     main="MBA Performance of placed students",
     xlab="MBA Percentage",
     ylab="Count",
     xlim=c(50,80), ylim=c(0,150),
     breaks=3,
     col="lightblue"))

with(notplaced.df, hist(notplaced.df$Percent_MBA,
     main="MBA Performance of not placed students",
     xlab="MBA Percentage",
     ylab="Count",
     xlim=c(50,80),
     breaks=3,
     col="lightblue"))

Use R to draw two boxplots, one below the other, comparing the distribution of salaries of males and females who were placed

boxplot(Salary ~ Gender, data=placed.df, horizental=TRUE,
     ylab="Salary", xlab="Gender",
     main="Comparison of Salaries of Males and Females")
axis(side=2, at=c(1,2), labels=c("Females","Males"))

Create a dataframe called placedET, representing students who were placed after the MBA and who also gave some MBA entrance test before admission into the MBA program.

placedET.df<-deansdilemma.df[which(deansdilemma.df$Placement_B==1 & deansdilemma.df$S.TEST==1),]
View(placedET.df)

Draw a Scatter Plot Matrix for 3 variables – {Salary, Percent_MBA, Percentile_ET} using the dataframe placedET.

library(car)
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET, cex=0.6,
                       data=placedET.df, diagonal="density")