data <- read.csv(file = 'data.csv')
head(data)
## Applicant.ID Division Gender Hired
## 1 1 E Female N
## 2 2 A Male Y
## 3 3 F Male N
## 4 4 F Female N
## 5 5 E Female Y
## 6 6 A Male Y
summary(data)
## Applicant.ID Division Gender Hired
## Min. : 1 A:1202 Female:2365 N:3571
## 1st Qu.:1459 B: 754 Male :3468 Y:2262
## Median :2917 C:1183
## Mean :2917 D:1022
## 3rd Qu.:4375 E: 752
## Max. :5833 F: 920
str(data)
## 'data.frame': 5833 obs. of 4 variables:
## $ Applicant.ID: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Division : Factor w/ 6 levels "A","B","C","D",..: 5 1 6 6 5 1 3 2 3 6 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 2 2 1 1 2 1 2 1 1 ...
## $ Hired : Factor w/ 2 levels "N","Y": 1 2 1 1 2 2 1 1 1 1 ...
data.tab<-table(data$Gender,data$Hired)
data.tab
##
## N Y
## Female 1647 718
## Male 1924 1544
str(data.tab)
## 'table' int [1:2, 1:2] 1647 1924 718 1544
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:2] "Female" "Male"
## ..$ : chr [1:2] "N" "Y"
head(data.tab)
##
## N Y
## Female 1647 718
## Male 1924 1544
addmargins(data.tab,2)
##
## N Y Sum
## Female 1647 718 2365
## Male 1924 1544 3468
prop.table(data.tab,1)
##
## N Y
## Female 0.6964059 0.3035941
## Male 0.5547866 0.4452134
cbind(addmargins(data.tab,2),prop.table(data.tab,1))
## N Y Sum N Y
## Female 1647 718 2365 0.6964059 0.3035941
## Male 1924 1544 3468 0.5547866 0.4452134
data2<- xtabs(~Gender + Hired, data=data)
head(data2)
## Hired
## Gender N Y
## Female 1647 718
## Male 1924 1544
library(vcd)
## Warning: package 'vcd' was built under R version 3.6.2
## Loading required package: grid
assocstats(data2)
## X^2 df P(> X^2)
## Likelihood Ratio 120.39 1 0
## Pearson 118.79 1 0
##
## Phi-Coefficient : 0.143
## Contingency Coeff.: 0.141
## Cramer's V : 0.143
summary(data2)
## Call: xtabs(formula = ~Gender + Hired, data = data)
## Number of cases in table: 5833
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 118.79, df = 1, p-value = 1.167e-27