yes
It would not be appropriate to apply the findings back to the population as a whole.
4,526
1,835
# Load packages
library(dplyr)
ucb_admit <- read.csv("/resources/rstudio/BusinessStatistics/data/ucb_admit.csv")
ucb_admit$Dept <- as.factor(ucb_admit$Dept)
str(ucb_admit)
## 'data.frame': 4526 obs. of 3 variables:
## $ Admit : Factor w/ 2 levels "Admitted","Rejected": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender: Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Dept : Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(ucb_admit)
## Admit Gender Dept
## Admitted:1755 Female:1835 A:933
## Rejected:2771 Male :2691 B:585
## C:918
## D:792
## E:584
## F:714
head(ucb_admit)
## Admit Gender Dept
## 1 Admitted Male A
## 2 Admitted Male A
## 3 Admitted Male A
## 4 Admitted Male A
## 5 Admitted Male A
## 6 Admitted Male A
89
# Count number of male and female applicants admitted
ucb_admit %>%
count(Dept, Admit, Gender)
## # A tibble: 24 x 4
## Dept Admit Gender n
## <fct> <fct> <fct> <int>
## 1 A Admitted Female 89
## 2 A Admitted Male 512
## 3 A Rejected Female 19
## 4 A Rejected Male 313
## 5 B Admitted Female 17
## 6 B Admitted Male 353
## 7 B Rejected Female 8
## 8 B Rejected Male 207
## 9 C Admitted Female 202
## 10 C Admitted Male 120
## # ... with 14 more rows
69.6%
ucb_admit %>%
count(Gender, Admit) %>%
# Group by gender
group_by(Gender) %>%
# Create new variable
mutate(prop = n / sum(n)) %>%
# Filter for admitted
filter(Gender == "Female")
## # A tibble: 2 x 4
## # Groups: Gender [1]
## Gender Admit n prop
## <fct> <fct> <int> <dbl>
## 1 Female Admitted 557 0.304
## 2 Female Rejected 1278 0.696
Stratified Sampling