salary <- read_excel("../00_data/Salaries.xlsx")
rank_sex_counts <- salary %>%
count(rank, sex)
rank_sex_counts
## # A tibble: 6 × 3
## rank sex n
## <chr> <chr> <int>
## 1 AssocProf Female 10
## 2 AssocProf Male 54
## 3 AsstProf Female 11
## 4 AsstProf Male 56
## 5 Prof Female 18
## 6 Prof Male 248
rank_sex_wide <- rank_sex_counts %>%
pivot_wider(
names_from = sex, # new column names
values_from = n # values to fill
)
rank_sex_wide
## # A tibble: 3 × 3
## rank Female Male
## <chr> <int> <int>
## 1 AssocProf 10 54
## 2 AsstProf 11 56
## 3 Prof 18 248
rank_summary_wide <- salary %>%
group_by(rank) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
.groups = "drop"
)
rank_summary_wide
## # A tibble: 3 × 3
## rank mean_salary median_salary
## <chr> <dbl> <dbl>
## 1 AssocProf 93876. 95626.
## 2 AsstProf 80776. 79800
## 3 Prof 126772. 123322.
rank_summary_long <- rank_summary_wide %>%
pivot_longer(
cols = c(mean_salary, median_salary),
names_to = "stat",
values_to = "value"
)
rank_summary_long
## # A tibble: 6 × 3
## rank stat value
## <chr> <chr> <dbl>
## 1 AssocProf mean_salary 93876.
## 2 AssocProf median_salary 95626.
## 3 AsstProf mean_salary 80776.
## 4 AsstProf median_salary 79800
## 5 Prof mean_salary 126772.
## 6 Prof median_salary 123322.
salary_sep <- salary %>%
mutate(rank_sex = paste(rank, sex, sep = "/"))
salary_sep %>%
select(rank, sex, rank_sex) %>%
head()
## # A tibble: 6 × 3
## rank sex rank_sex
## <chr> <chr> <chr>
## 1 Prof Male Prof/Male
## 2 Prof Male Prof/Male
## 3 AsstProf Male AsstProf/Male
## 4 Prof Male Prof/Male
## 5 Prof Male Prof/Male
## 6 AssocProf Male AssocProf/Male
salary_separated <- salary_sep %>%
separate(
col = rank_sex,
into = c("rank2", "sex2"),
sep = "/"
)
salary_separated %>%
select(rank, sex, rank2, sex2) %>%
head()
## # A tibble: 6 × 4
## rank sex rank2 sex2
## <chr> <chr> <chr> <chr>
## 1 Prof Male Prof Male
## 2 Prof Male Prof Male
## 3 AsstProf Male AsstProf Male
## 4 Prof Male Prof Male
## 5 Prof Male Prof Male
## 6 AssocProf Male AssocProf Male
salary_united <- salary %>%
unite(
col = rank_sex,
rank, sex,
sep = "/",
remove = FALSE # keep original columns too
)
salary_united %>%
select(rank, sex, rank_sex) %>%
head()
## # A tibble: 6 × 3
## rank sex rank_sex
## <chr> <chr> <chr>
## 1 Prof Male Prof/Male
## 2 Prof Male Prof/Male
## 3 AsstProf Male AsstProf/Male
## 4 Prof Male Prof/Male
## 5 Prof Male Prof/Male
## 6 AssocProf Male AssocProf/Male
colSums(is.na(salary))
## rank discipline yrs.since.phd yrs.service sex
## 0 0 0 0 0
## salary
## 0