data <- read_csv("../00_data/data/Salaries.csv")
## Rows: 397 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): rank, discipline, sex
## dbl (3): yrs.since.phd, yrs.service, salary
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Make two bar charts here - one before ordering another after
salary_by_rank <- data %>%
group_by(rank) %>%
summarise(
avg_salary = mean(salary, na.rm = TRUE))
salary_by_rank %>%
ggplot(aes(x = avg_salary, y = rank)) +
geom_point()
# reordered
salary_by_rank %>%
ggplot(aes(x = avg_salary, y = fct_reorder(.f = rank, .x = avg_salary))) +
geom_point() +
labs(y = "Rank", x = "Mean Salary")
Show examples of three functions:
data %>%
mutate(sex_rev = fct_recode(sex, "M" = "Male", "F" = "Female")) %>%
select(sex, sex_rev)
## # A tibble: 397 × 2
## sex sex_rev
## <chr> <fct>
## 1 Male M
## 2 Male M
## 3 Male M
## 4 Male M
## 5 Male M
## 6 Male M
## 7 Male M
## 8 Male M
## 9 Male M
## 10 Female F
## # ℹ 387 more rows
data %>%
mutate(rank_col = fct_collapse(rank, "Other" = c("AssocProf","AsstProf"))) %>%
select(rank, rank_col)
## # A tibble: 397 × 2
## rank rank_col
## <chr> <fct>
## 1 Prof Prof
## 2 Prof Prof
## 3 AsstProf Other
## 4 Prof Prof
## 5 Prof Prof
## 6 AssocProf Other
## 7 Prof Prof
## 8 Prof Prof
## 9 Prof Prof
## 10 Prof Prof
## # ℹ 387 more rows
data %>% mutate(rank_lump = fct_lump(rank)) %>% distinct(rank_lump)
## # A tibble: 2 × 1
## rank_lump
## <fct>
## 1 Prof
## 2 Other
No need to do anything here.