# csv file
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
## Rows: 2088 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): occupation, major_category, minor_category
## dbl (9): year, total_workers, workers_male, workers_female, percent_female, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
jobs_gender %>%
group_by(occupation) %>%
summarise(
all_workers = mean(total_workers, na.rm = TRUE)
)
## # A tibble: 522 × 2
## occupation all_workers
## <chr> <dbl>
## 1 Accountants and auditors 1608432
## 2 Actors 12641
## 3 Actuaries 24152.
## 4 Adhesive bonding machine operators and tenders 7328.
## 5 Administrative services managers 125857.
## 6 Advertising and promotions managers 43250.
## 7 Advertising sales agents 143603
## 8 Aerospace engineers 111415.
## 9 Agents and business managers of artists, performers, and athletes 29352.
## 10 Agricultural and food science technicians 28422.
## # … with 512 more rows
# Plot
jobs_gender %>%
ggplot(aes(x = occupation, y = total_workers)) +
geom_point()
jobs_gender %>%
ggplot(aes(x = occupation, y = fct_reorder(.f = occupation, .x = total_workers))) +
geom_point()
jobs_gender %>% distinct(occupation)
## # A tibble: 522 × 1
## occupation
## <chr>
## 1 Chief executives
## 2 General and operations managers
## 3 Legislators
## 4 Advertising and promotions managers
## 5 Marketing and sales managers
## 6 Public relations and fundraising managers
## 7 Administrative services managers
## 8 Computer and information systems managers
## 9 Financial managers
## 10 Compensation and benefits managers
## # … with 512 more rows
# Recode
jobs_gender %>%
# Rename Levels
mutate(occupation_rev = fct_recode(occupation, "Executives" = "Chief executives")) %>%
select(occupation, occupation_rev) %>%
filter(occupation == "Chief executives")
## # A tibble: 4 × 2
## occupation occupation_rev
## <chr> <fct>
## 1 Chief executives Executives
## 2 Chief executives Executives
## 3 Chief executives Executives
## 4 Chief executives Executives
# Collapse multiple levels into one
jobs_gender %>%
mutate(occupation_col = fct_collapse(occupation, "Jobs" = c("Chief executives","Executives"))) %>%
select(occupation, occupation_col) %>%
filter(occupation != "Jobs")
## Warning: Unknown levels in `f`: Executives
## # A tibble: 2,088 × 2
## occupation occupation_col
## <chr> <fct>
## 1 Chief executives Jobs
## 2 General and operations managers General and operations managers
## 3 Legislators Legislators
## 4 Advertising and promotions managers Advertising and promotions managers
## 5 Marketing and sales managers Marketing and sales managers
## 6 Public relations and fundraising managers Public relations and fundraising m…
## 7 Administrative services managers Administrative services managers
## 8 Computer and information systems managers Computer and information systems m…
## 9 Financial managers Financial managers
## 10 Compensation and benefits managers Compensation and benefits managers
## # … with 2,078 more rows
# Lump small levels into other categories
jobs_gender %>% count(occupation)
## # A tibble: 522 × 2
## occupation n
## <chr> <int>
## 1 Accountants and auditors 4
## 2 Actors 4
## 3 Actuaries 4
## 4 Adhesive bonding machine operators and tenders 4
## 5 Administrative services managers 4
## 6 Advertising and promotions managers 4
## 7 Advertising sales agents 4
## 8 Aerospace engineers 4
## 9 Agents and business managers of artists, performers, and athletes 4
## 10 Agricultural and food science technicians 4
## # … with 512 more rows
jobs_gender %>% mutate(occupation_lump = fct_lump(occupation)) %>% distinct(occupation_lump)
## # A tibble: 522 × 1
## occupation_lump
## <fct>
## 1 Chief executives
## 2 General and operations managers
## 3 Legislators
## 4 Advertising and promotions managers
## 5 Marketing and sales managers
## 6 Public relations and fundraising managers
## 7 Administrative services managers
## 8 Computer and information systems managers
## 9 Financial managers
## 10 Compensation and benefits managers
## # … with 512 more rows
Show examples of three functions:
No need to do anything here.