setwd("C:\\Users\\harsh\\Desktop\\r")
mba.df<-read.csv("Data - Deans Dilemma.csv",TRUE,",")
View(mba.df)
placed.df <- subset(mba.df, Placement=="Placed")
View(placed.df)
by(placed.df$Salary, placed.df$Gender, mean)
## placed.df$Gender: F
## [1] 253068
## --------------------------------------------------------
## placed.df$Gender: M
## [1] 284241.9
Average Male Salary = 284241 Average Female Salary = 253068 Male salaries are clearly higher than female salaries.so There exists a gender gap.
t.test(Salary~Gender, placed.df)
##
## Welch Two Sample t-test
##
## data: Salary by Gender
## t = -3.0757, df = 243.03, p-value = 0.00234
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -51138.42 -11209.22
## sample estimates:
## mean in group F mean in group M
## 253068.0 284241.9
p-value = 0.00234 As p-value < 0.05, we can reject NULL hypothesis. Mean of male salaries are higher than female salaries for students who were placed so there exists a gender gap.