Yes ## Q2. According to your answer in Q1, would it be appropriate to apply the findings back to the population as a whole? It wold not be apropriate
4526
1835
# Load packages
library(dplyr)
ucb_admit <- read.csv("/resources/rstudio/BusinessStatistics/Data/ucb_admit.csv")
ucb_admit$Dept <- as.factor(ucb_admit$Dept)
str(ucb_admit)
## 'data.frame': 4526 obs. of 3 variables:
## $ Admit : Factor w/ 2 levels "Admitted","Rejected": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender: Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Dept : Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(ucb_admit)
## Admit Gender Dept
## Admitted:1755 Female:1835 A:933
## Rejected:2771 Male :2691 B:585
## C:918
## D:792
## E:584
## F:714
head(ucb_admit)
## Admit Gender Dept
## 1 Admitted Male A
## 2 Admitted Male A
## 3 Admitted Male A
## 4 Admitted Male A
## 5 Admitted Male A
## 6 Admitted Male A
1835
# Count number of male and female applicants admitted
ucb_admit %>%
count(Dept, Admit)
## # A tibble: 12 x 3
## Dept Admit n
## <fct> <fct> <int>
## 1 A Admitted 601
## 2 A Rejected 332
## 3 B Admitted 370
## 4 B Rejected 215
## 5 C Admitted 322
## 6 C Rejected 596
## 7 D Admitted 269
## 8 D Rejected 523
## 9 E Admitted 147
## 10 E Rejected 437
## 11 F Admitted 46
## 12 F Rejected 668
69.6
ucb_admit %>%
count(Gender, Admit) %>%
# Group by gender
group_by(Gender) %>%
# Create new variable
mutate(prop = n / sum(n)) %>%
# Filter for admitted
filter(Gender == "Female")
## # A tibble: 2 x 4
## # Groups: Gender [1]
## Gender Admit n prop
## <fct> <fct> <int> <dbl>
## 1 Female Admitted 557 0.304
## 2 Female Rejected 1278 0.696
Stratified Sampling