This is a R markdown file created in submission for the Analysis of Deans Dilemma
Task 2b - Reading the dataset
dean.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))
TASK 3a Use R to calculate the median salary of all the students in the data sample
median(dean.df$Salary)
## [1] 240000
TASK 3b Use R to calculate the percentage of students who were placed, correct to 2 decimal places.
format(round(sum(dean.df$Placement == "Placed")/nrow(dean.df)*100, 2), nsmall = 2)
## [1] "79.80"
TASK 3c Use R to create a dataframe called placed, that contains a subset of only those students who were successfully placed.
placed.df <- subset(dean.df , dean.df$Placement == "Placed")
TASK 3d Use R to find the median salary of students who were placed.
median(subset(dean.df , dean.df$Placement == "Placed")$Salary)
## [1] 260000
TASK 3e Use R to create a table showing the mean salary of males and females, who were placed.
aggregate(subset(dean.df , dean.df$Placement == "Placed")$Salary, by=list(Gender=subset(dean.df , dean.df$Placement == "Placed")$Gender), mean)
## Gender x
## 1 F 253068.0
## 2 M 284241.9
TASK 3f Use R to generate the following histogram showing a breakup of the MBA performance of the students who were placed
hist(subset(dean.df , dean.df$Placement == "Placed")$Percent_MBA,
main="MBA Performance of Placed Students", # add labels
xlab="MBA Percentage",
ylab="Count" )
TASK 3g Create a dataframe called notplaced, that contains a subset of only those students who were NOT placed after their MBA.
notplaced <- subset(dean.df , dean.df$Placement != "Placed")
TASK 3h Draw two histograms side-by-side, visually comparing the MBA performance of Placed and Not Placed students, as follows:
par(mfrow=c(1, 2))
hist(subset(dean.df , dean.df$Placement == "Placed")$Percent_MBA,
main="MBA Performance of Placed Students", # add labels
xlab="MBA Percentage",
ylab="Count" )
hist(subset(dean.df , dean.df$Placement == "Not Placed")$Percent_MBA,
main="MBA Performance of Not Placed Students", # add labels
xlab="MBA Percentage",
ylab="Count" )
par(mfrow=c(1, 1))
TASK 3i Use R to draw two boxplots, one below the other, comparing the distribution of salaries of males and females who were placed, as follows:
par(mfrow=c(2, 1))
boxplot(subset(dean.df , dean.df$Placement == "Placed" & dean.df$Gender == "M" )$Salary, main="Males",col=c("yellow"),horizontal=TRUE,
xlab="Salary" )
boxplot(subset(dean.df , dean.df$Placement == "Placed" & dean.df$Gender == "F" )$Salary, main="FeMales",col=c("yellow"),horizontal=TRUE,
xlab="Salary" )
par(mfrow=c(1, 1))
TASK 3j Create a dataframe called placedET, representing students who were placed after the MBA and who also gave some MBA entrance test before admission into the MBA program.
placedET <- subset(dean.df , dean.df$Placement == "Placed" & dean.df$Entrance_Test != "None")
TASK 3k Draw a Scatter Plot Matrix for 3 variables – {Salary, Percent_MBA, Percentile_ET} using the dataframe placedET.
library(car)
## Warning: package 'car' was built under R version 3.4.3
scatterplotMatrix(
placedET[
,c("Salary","Percent_MBA","Percentile_ET")],
spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot Matrix")