#getwd()
setwd('C:/Users/TANAY/Downloads')
#getwd()

deans <- read.csv('Data - Deans Dilemma.csv')

View(deans)

summary(deans)
##       SlNo       Gender     Gender.B       Percent_SSC     Board_SSC  
##  Min.   :  1.0   F:127   Min.   :0.0000   Min.   :37.00   CBSE  :113  
##  1st Qu.: 98.5   M:264   1st Qu.:0.0000   1st Qu.:56.00   ICSE  : 77  
##  Median :196.0           Median :0.0000   Median :64.50   Others:201  
##  Mean   :196.0           Mean   :0.3248   Mean   :64.65               
##  3rd Qu.:293.5           3rd Qu.:1.0000   3rd Qu.:74.00               
##  Max.   :391.0           Max.   :1.0000   Max.   :87.20               
##                                                                       
##    Board_CBSE      Board_ICSE      Percent_HSC    Board_HSC  
##  Min.   :0.000   Min.   :0.0000   Min.   :40.0   CBSE  : 96  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:54.0   ISC   : 48  
##  Median :0.000   Median :0.0000   Median :63.0   Others:247  
##  Mean   :0.289   Mean   :0.1969   Mean   :63.8               
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:72.0               
##  Max.   :1.000   Max.   :1.0000   Max.   :94.7               
##                                                              
##     Stream_HSC  Percent_Degree                Course_Degree
##  Arts    : 18   Min.   :35.00   Arts                 : 13  
##  Commerce:222   1st Qu.:57.52   Commerce             :117  
##  Science :151   Median :63.00   Computer Applications: 32  
##                 Mean   :62.98   Engineering          : 37  
##                 3rd Qu.:69.00   Management           :163  
##                 Max.   :89.00   Others               :  5  
##                                 Science              : 24  
##   Degree_Engg      Experience_Yrs   Entrance_Test     S.TEST      
##  Min.   :0.00000   Min.   :0.0000   MAT    :265   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   None   : 67   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   K-MAT  : 24   Median :1.0000  
##  Mean   :0.09463   Mean   :0.4783   CAT    : 22   Mean   :0.8286  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   PGCET  :  8   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :3.0000   GCET   :  2   Max.   :1.0000  
##                                     (Other):  3                   
##  Percentile_ET    S.TEST.SCORE    Percent_MBA   
##  Min.   : 0.00   Min.   : 0.00   Min.   :50.83  
##  1st Qu.:41.19   1st Qu.:41.19   1st Qu.:57.20  
##  Median :62.00   Median :62.00   Median :61.01  
##  Mean   :54.93   Mean   :54.93   Mean   :61.67  
##  3rd Qu.:78.00   3rd Qu.:78.00   3rd Qu.:66.02  
##  Max.   :98.69   Max.   :98.69   Max.   :77.89  
##                                                 
##            Specialization_MBA Marks_Communication Marks_Projectwork
##  Marketing & Finance:222      Min.   :50.00       Min.   :50.00    
##  Marketing & HR     :156      1st Qu.:53.00       1st Qu.:64.00    
##  Marketing & IB     : 13      Median :58.00       Median :69.00    
##                               Mean   :60.54       Mean   :68.36    
##                               3rd Qu.:67.00       3rd Qu.:74.00    
##                               Max.   :88.00       Max.   :87.00    
##                                                                    
##    Marks_BOCA         Placement    Placement_B        Salary      
##  Min.   :50.00   Not Placed: 79   Min.   :0.000   Min.   :     0  
##  1st Qu.:57.00   Placed    :312   1st Qu.:1.000   1st Qu.:172800  
##  Median :63.00                    Median :1.000   Median :240000  
##  Mean   :64.38                    Mean   :0.798   Mean   :219078  
##  3rd Qu.:72.50                    3rd Qu.:1.000   3rd Qu.:300000  
##  Max.   :96.00                    Max.   :1.000   Max.   :940000  
## 
median(deans$Salary)
## [1] 240000
placed <- subset(deans,Placement_B==1)
percent <- nrow(placed)/nrow(deans)
percent*100
## [1] 79.7954
median(placed$Salary)
## [1] 260000
placed_mean <- aggregate(placed$Salary,list(placed$Gender),mean)
placed_mean
##   Group.1        x
## 1       F 253068.0
## 2       M 284241.9
hist(placed$Percent_MBA, main="MBA Performance of placed students", xlab="MBA Percentage", ylab="Count",  xlim=c(50,80), ylim=c(0,150), breaks=3,col="grey")

notplaced <- subset(deans,Placement_B==0)

par(mfrow=c(1,2))
with(placed, hist(placed$Percent_MBA, main="MBA performance of Placed Students", xlab="MBA Percent", ylab="Count",xlim=c(50,80), breaks=3,col="grey"))
with(notplaced, hist(notplaced$Percent_MBA, main="MBA performance of Not Placed Students",xlab="MBA Percent",ylab="Count",xlim=c(50,80), breaks=3,col="grey"))

boxplot(Salary~Gender,data=placed,xlab="Salary",ylab="Gender",main="Comparison of Salaries of Males And Females",horizontal=TRUE,names=c("Females","Males"))

placedET <- subset(placed,S.TEST==1)

install.packages("car",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/TANAY/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'car' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\TANAY\AppData\Local\Temp\Rtmpg9liFs\downloaded_packages
library(car)
scatterplotMatrix(formula = ~ Salary + Percent_MBA + Percentile_ET,
                  data=placedET)