Read and View the data using read.csv
data.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))
View(data.df)
head(data.df)
## SlNo Gender Gender.B Percent_SSC Board_SSC Board_CBSE Board_ICSE
## 1 1 M 0 62.00 Others 0 0
## 2 2 M 0 76.33 ICSE 0 1
## 3 3 M 0 72.00 Others 0 0
## 4 4 M 0 60.00 CBSE 1 0
## 5 5 M 0 61.00 CBSE 1 0
## 6 6 M 0 55.00 ICSE 0 1
## Percent_HSC Board_HSC Stream_HSC Percent_Degree Course_Degree
## 1 88.00 Others Commerce 52.00 Science
## 2 75.33 Others Science 75.48 Computer Applications
## 3 78.00 Others Commerce 66.63 Engineering
## 4 63.00 CBSE Arts 58.00 Management
## 5 55.00 ISC Science 54.00 Engineering
## 6 64.00 CBSE Commerce 50.00 Commerce
## Degree_Engg Experience_Yrs Entrance_Test S.TEST Percentile_ET
## 1 0 0 MAT 1 55.0
## 2 0 1 MAT 1 86.5
## 3 1 0 None 0 0.0
## 4 0 0 MAT 1 75.0
## 5 1 1 MAT 1 66.0
## 6 0 0 None 0 0.0
## S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
## 1 55.0 58.80 Marketing & HR 50
## 2 86.5 66.28 Marketing & Finance 69
## 3 0.0 52.91 Marketing & Finance 50
## 4 75.0 57.80 Marketing & Finance 54
## 5 66.0 59.43 Marketing & HR 52
## 6 0.0 56.81 Marketing & Finance 53
## Marks_Projectwork Marks_BOCA Placement Placement_B Salary
## 1 65 74 Placed 1 270000
## 2 70 75 Placed 1 200000
## 3 61 59 Placed 1 240000
## 4 66 62 Placed 1 250000
## 5 65 67 Placed 1 180000
## 6 70 53 Placed 1 300000
Create a dataframe called placed containing a subset of only those students who were successfully placed
placed <- data.df[which(data.df$Placement_B == 1), ]
head(placed)
## SlNo Gender Gender.B Percent_SSC Board_SSC Board_CBSE Board_ICSE
## 1 1 M 0 62.00 Others 0 0
## 2 2 M 0 76.33 ICSE 0 1
## 3 3 M 0 72.00 Others 0 0
## 4 4 M 0 60.00 CBSE 1 0
## 5 5 M 0 61.00 CBSE 1 0
## 6 6 M 0 55.00 ICSE 0 1
## Percent_HSC Board_HSC Stream_HSC Percent_Degree Course_Degree
## 1 88.00 Others Commerce 52.00 Science
## 2 75.33 Others Science 75.48 Computer Applications
## 3 78.00 Others Commerce 66.63 Engineering
## 4 63.00 CBSE Arts 58.00 Management
## 5 55.00 ISC Science 54.00 Engineering
## 6 64.00 CBSE Commerce 50.00 Commerce
## Degree_Engg Experience_Yrs Entrance_Test S.TEST Percentile_ET
## 1 0 0 MAT 1 55.0
## 2 0 1 MAT 1 86.5
## 3 1 0 None 0 0.0
## 4 0 0 MAT 1 75.0
## 5 1 1 MAT 1 66.0
## 6 0 0 None 0 0.0
## S.TEST.SCORE Percent_MBA Specialization_MBA Marks_Communication
## 1 55.0 58.80 Marketing & HR 50
## 2 86.5 66.28 Marketing & Finance 69
## 3 0.0 52.91 Marketing & Finance 50
## 4 75.0 57.80 Marketing & Finance 54
## 5 66.0 59.43 Marketing & HR 52
## 6 0.0 56.81 Marketing & Finance 53
## Marks_Projectwork Marks_BOCA Placement Placement_B Salary
## 1 65 74 Placed 1 270000
## 2 70 75 Placed 1 200000
## 3 61 59 Placed 1 240000
## 4 66 62 Placed 1 250000
## 5 65 67 Placed 1 180000
## 6 70 53 Placed 1 300000
Create a table showing the mean salary of males and females, who were placed
mean_sal_gender <- aggregate(placed$Salary, list(placed$Gender), mean)
mean_sal_gender
## Group.1 x
## 1 F 253068.0
## 2 M 284241.9
Plot box plots for each category to understand data distribution and identify outliers
boxplot(Salary ~ Gender, data = placed, main = "Salary of Males and Females who were placed", xlab = "Gender (Femame/male)",col = (c("green","blue")), ylab = "Salary")
Take the log of Salary data to stabilize variance
log.transformed.Salary = log(placed$Salary)
boxplot(log.transformed.Salary ~ Gender, data = placed, main = "Salary of Males and Females who were placed", xlab = "Gender (Femame/male)",col = (c("green","blue")), ylab = "Log Transformed Salary")
Run a t-test to test the following hypothesis: H1: The average salary of the male MBAs is higher than the average salary of female MBAs.
t.test(log.transformed.Salary ~ Gender, data = placed, var.equal = TRUE)
##
## Two Sample t-test
##
## data: log.transformed.Salary by Gender
## t = -2.8142, df = 310, p-value = 0.005203
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.17482594 -0.03094897
## sample estimates:
## mean in group F mean in group M
## 12.40435 12.50723
Interpret the results