setwd("C:/Users/Leo Tolstoy/Downloads")
dean.df<-read.csv(paste("Data - Deans Dilemma.csv",sep=""))
View(dean.df)
summary(dean.df)
##       SlNo       Gender     Gender.B       Percent_SSC     Board_SSC  
##  Min.   :  1.0   F:127   Min.   :0.0000   Min.   :37.00   CBSE  :113  
##  1st Qu.: 98.5   M:264   1st Qu.:0.0000   1st Qu.:56.00   ICSE  : 77  
##  Median :196.0           Median :0.0000   Median :64.50   Others:201  
##  Mean   :196.0           Mean   :0.3248   Mean   :64.65               
##  3rd Qu.:293.5           3rd Qu.:1.0000   3rd Qu.:74.00               
##  Max.   :391.0           Max.   :1.0000   Max.   :87.20               
##                                                                       
##    Board_CBSE      Board_ICSE      Percent_HSC    Board_HSC  
##  Min.   :0.000   Min.   :0.0000   Min.   :40.0   CBSE  : 96  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:54.0   ISC   : 48  
##  Median :0.000   Median :0.0000   Median :63.0   Others:247  
##  Mean   :0.289   Mean   :0.1969   Mean   :63.8               
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:72.0               
##  Max.   :1.000   Max.   :1.0000   Max.   :94.7               
##                                                              
##     Stream_HSC  Percent_Degree                Course_Degree
##  Arts    : 18   Min.   :35.00   Arts                 : 13  
##  Commerce:222   1st Qu.:57.52   Commerce             :117  
##  Science :151   Median :63.00   Computer Applications: 32  
##                 Mean   :62.98   Engineering          : 37  
##                 3rd Qu.:69.00   Management           :163  
##                 Max.   :89.00   Others               :  5  
##                                 Science              : 24  
##   Degree_Engg      Experience_Yrs   Entrance_Test     S.TEST      
##  Min.   :0.00000   Min.   :0.0000   MAT    :265   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   None   : 67   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   K-MAT  : 24   Median :1.0000  
##  Mean   :0.09463   Mean   :0.4783   CAT    : 22   Mean   :0.8286  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   PGCET  :  8   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :3.0000   GCET   :  2   Max.   :1.0000  
##                                     (Other):  3                   
##  Percentile_ET    S.TEST.SCORE    Percent_MBA   
##  Min.   : 0.00   Min.   : 0.00   Min.   :50.83  
##  1st Qu.:41.19   1st Qu.:41.19   1st Qu.:57.20  
##  Median :62.00   Median :62.00   Median :61.01  
##  Mean   :54.93   Mean   :54.93   Mean   :61.67  
##  3rd Qu.:78.00   3rd Qu.:78.00   3rd Qu.:66.02  
##  Max.   :98.69   Max.   :98.69   Max.   :77.89  
##                                                 
##            Specialization_MBA Marks_Communication Marks_Projectwork
##  Marketing & Finance:222      Min.   :50.00       Min.   :50.00    
##  Marketing & HR     :156      1st Qu.:53.00       1st Qu.:64.00    
##  Marketing & IB     : 13      Median :58.00       Median :69.00    
##                               Mean   :60.54       Mean   :68.36    
##                               3rd Qu.:67.00       3rd Qu.:74.00    
##                               Max.   :88.00       Max.   :87.00    
##                                                                    
##    Marks_BOCA         Placement    Placement_B        Salary      
##  Min.   :50.00   Not Placed: 79   Min.   :0.000   Min.   :     0  
##  1st Qu.:57.00   Placed    :312   1st Qu.:1.000   1st Qu.:172800  
##  Median :63.00                    Median :1.000   Median :240000  
##  Mean   :64.38                    Mean   :0.798   Mean   :219078  
##  3rd Qu.:72.50                    3rd Qu.:1.000   3rd Qu.:300000  
##  Max.   :96.00                    Max.   :1.000   Max.   :940000  
## 
library(psych)
describe(dean.df)
##                     vars   n      mean        sd    median   trimmed
## SlNo                   1 391    196.00    113.02    196.00    196.00
## Gender*                2 391      1.68      0.47      2.00      1.72
## Gender.B               3 391      0.32      0.47      0.00      0.28
## Percent_SSC            4 391     64.65     10.96     64.50     64.76
## Board_SSC*             5 391      2.23      0.87      3.00      2.28
## Board_CBSE             6 391      0.29      0.45      0.00      0.24
## Board_ICSE             7 391      0.20      0.40      0.00      0.12
## Percent_HSC            8 391     63.80     11.42     63.00     63.34
## Board_HSC*             9 391      2.39      0.85      3.00      2.48
## Stream_HSC*           10 391      2.34      0.56      2.00      2.36
## Percent_Degree        11 391     62.98      8.92     63.00     62.91
## Course_Degree*        12 391      3.85      1.61      4.00      3.81
## Degree_Engg           13 391      0.09      0.29      0.00      0.00
## Experience_Yrs        14 391      0.48      0.67      0.00      0.36
## Entrance_Test*        15 391      5.85      1.35      6.00      6.08
## S.TEST                16 391      0.83      0.38      1.00      0.91
## Percentile_ET         17 391     54.93     31.17     62.00     56.87
## S.TEST.SCORE          18 391     54.93     31.17     62.00     56.87
## Percent_MBA           19 391     61.67      5.85     61.01     61.45
## Specialization_MBA*   20 391      1.47      0.56      1.00      1.42
## Marks_Communication   21 391     60.54      8.82     58.00     59.68
## Marks_Projectwork     22 391     68.36      7.15     69.00     68.60
## Marks_BOCA            23 391     64.38      9.58     63.00     64.08
## Placement*            24 391      1.80      0.40      2.00      1.87
## Placement_B           25 391      0.80      0.40      1.00      0.87
## Salary                26 391 219078.26 138311.65 240000.00 217011.50
##                          mad   min       max     range  skew kurtosis
## SlNo                  145.29  1.00    391.00    390.00  0.00    -1.21
## Gender*                 0.00  1.00      2.00      1.00 -0.75    -1.45
## Gender.B                0.00  0.00      1.00      1.00  0.75    -1.45
## Percent_SSC            12.60 37.00     87.20     50.20 -0.06    -0.72
## Board_SSC*              0.00  1.00      3.00      2.00 -0.45    -1.53
## Board_CBSE              0.00  0.00      1.00      1.00  0.93    -1.14
## Board_ICSE              0.00  0.00      1.00      1.00  1.52     0.31
## Percent_HSC            13.34 40.00     94.70     54.70  0.29    -0.67
## Board_HSC*              0.00  1.00      3.00      2.00 -0.83    -1.13
## Stream_HSC*             0.00  1.00      3.00      2.00 -0.12    -0.72
## Percent_Degree          8.90 35.00     89.00     54.00  0.05     0.24
## Course_Degree*          1.48  1.00      7.00      6.00  0.00    -1.08
## Degree_Engg             0.00  0.00      1.00      1.00  2.76     5.63
## Experience_Yrs          0.00  0.00      3.00      3.00  1.27     1.17
## Entrance_Test*          0.00  1.00      9.00      8.00 -2.52     7.04
## S.TEST                  0.00  0.00      1.00      1.00 -1.74     1.02
## Percentile_ET          25.20  0.00     98.69     98.69 -0.74    -0.69
## S.TEST.SCORE           25.20  0.00     98.69     98.69 -0.74    -0.69
## Percent_MBA             6.39 50.83     77.89     27.06  0.34    -0.52
## Specialization_MBA*     0.00  1.00      3.00      2.00  0.70    -0.56
## Marks_Communication     8.90 50.00     88.00     38.00  0.74    -0.25
## Marks_Projectwork       7.41 50.00     87.00     37.00 -0.26    -0.27
## Marks_BOCA             11.86 50.00     96.00     46.00  0.29    -0.85
## Placement*              0.00  1.00      2.00      1.00 -1.48     0.19
## Placement_B             0.00  0.00      1.00      1.00 -1.48     0.19
## Salary              88956.00  0.00 940000.00 940000.00  0.24     1.74
##                          se
## SlNo                   5.72
## Gender*                0.02
## Gender.B               0.02
## Percent_SSC            0.55
## Board_SSC*             0.04
## Board_CBSE             0.02
## Board_ICSE             0.02
## Percent_HSC            0.58
## Board_HSC*             0.04
## Stream_HSC*            0.03
## Percent_Degree         0.45
## Course_Degree*         0.08
## Degree_Engg            0.01
## Experience_Yrs         0.03
## Entrance_Test*         0.07
## S.TEST                 0.02
## Percentile_ET          1.58
## S.TEST.SCORE           1.58
## Percent_MBA            0.30
## Specialization_MBA*    0.03
## Marks_Communication    0.45
## Marks_Projectwork      0.36
## Marks_BOCA             0.48
## Placement*             0.02
## Placement_B            0.02
## Salary              6994.72

TASK 3a Use R to calculate the median salary of all the students in the data sample

median(dean.df$Salary)
## [1] 240000

TASK 3b Use R to calculate the percentage of students who were placed, correct to 2 decimal places.

round((nrow(subset(dean.df,dean.df$Placement == "Placed"))/nrow(dean.df))*100,2)
## [1] 79.8

TASK 3c Use R to create a dataframe called placed, that contains a subset of only those students who were successfully placed.

placed <- (subset(dean.df,dean.df$Placement == "Placed"))

TASK 3d Use R to find the median salary of students who were placed.

median(placed$Salary)
## [1] 260000

TASK 3e Use R to create a table showing the mean salary of males and females, who were placed.

sub1 <- aggregate(placed$Salary, by=list(placed$Gender), FUN = mean)
sub1
##   Group.1        x
## 1       F 253068.0
## 2       M 284241.9

TASK 3f Use R to generate the following histogram showing a breakup of the MBA performance of the students who were placed

hist(placed$Percent_MBA,breaks=c(50,60,70,80), main="MBA Performance of Placed Students", xlab="MBA Percentage", ylab="Number of placed students",col="blue")

TASK 3g Create a dataframe called notplaced, that contains a subset of only those students who were NOT placed after their MBA.

notplaced<-(subset(dean.df,dean.df$Placement=="Not Placed"))

TASK 3h Draw two histograms side-by-side, visually comparing the MBA performance of Placed and Not Placed students,

par(mfrow=c(1:2))
hist(placed$Percent_MBA,breaks=c(50,60,70,80), main="MBA Performance of Placed Students", xlab="MBA Percentage", ylab="Number of placed students",col="Red")
hist(notplaced$Percent_MBA,breaks=c(50,60,70,80), main="MBA Performance of Not Placed Students", xlab="MBA Percentage", ylab="Number of Not placed students",col= "Blue")

TASK 3i Use R to draw two boxplots, one below the other, comparing the distribution of salaries of males and females who were placed

boxplot(placed$Salary~placed$Gender, horizontal = TRUE, main = "comparison of salaries by gender", xlab = "Salary", ylab = "gender", col= c("Misty Rose", "Powder Blue"))

TASK 3j Create a dataframe called placedET, representing students who were placed after the MBA and who also gave some MBA entrance test before admission into the MBA program.

placedET <- data.frame(subset(placed, placed$Entrance_Test != "None"))

TASK 3k Draw a Scatter Plot Matrix for 3 variables – {Salary, Percent_MBA, Percentile_ET} using the dataframe placedET.

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(~Salary+Percent_MBA+Percentile_ET,data=placedET,main = "ScatterPlotMatrix")