Import your data

# csv file
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
## Rows: 2088 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): occupation, major_category, minor_category
## dbl (9): year, total_workers, workers_male, workers_female, percent_female, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

Modify factor order

jobs_gender %>%
    
    group_by(occupation) %>%
    summarise(
        all_workers = mean(total_workers, na.rm = TRUE)
    )
## # A tibble: 522 × 2
##    occupation                                                        all_workers
##    <chr>                                                                   <dbl>
##  1 Accountants and auditors                                             1608432 
##  2 Actors                                                                 12641 
##  3 Actuaries                                                              24152.
##  4 Adhesive bonding machine operators and tenders                          7328.
##  5 Administrative services managers                                      125857.
##  6 Advertising and promotions managers                                    43250.
##  7 Advertising sales agents                                              143603 
##  8 Aerospace engineers                                                   111415.
##  9 Agents and business managers of artists, performers, and athletes      29352.
## 10 Agricultural and food science technicians                              28422.
## # … with 512 more rows
# Plot
jobs_gender %>%
    
    ggplot(aes(x = occupation, y = total_workers)) +
    geom_point()

jobs_gender %>%
    
    ggplot(aes(x = occupation, y = fct_reorder(.f = occupation, .x = total_workers))) +
    geom_point()

Modify factor levels

jobs_gender %>% distinct(occupation)
## # A tibble: 522 × 1
##    occupation                               
##    <chr>                                    
##  1 Chief executives                         
##  2 General and operations managers          
##  3 Legislators                              
##  4 Advertising and promotions managers      
##  5 Marketing and sales managers             
##  6 Public relations and fundraising managers
##  7 Administrative services managers         
##  8 Computer and information systems managers
##  9 Financial managers                       
## 10 Compensation and benefits managers       
## # … with 512 more rows
# Recode
jobs_gender %>%
    
    # Rename Levels
    mutate(occupation_rev = fct_recode(occupation, "Executives" = "Chief executives")) %>%
    select(occupation, occupation_rev) %>%
    filter(occupation == "Chief executives")
## # A tibble: 4 × 2
##   occupation       occupation_rev
##   <chr>            <fct>         
## 1 Chief executives Executives    
## 2 Chief executives Executives    
## 3 Chief executives Executives    
## 4 Chief executives Executives
# Collapse multiple levels into one
jobs_gender %>%
    
    mutate(occupation_col = fct_collapse(occupation, "Jobs" = c("Chief executives","Executives"))) %>%
    select(occupation, occupation_col) %>%
    filter(occupation != "Jobs")
## Warning: Unknown levels in `f`: Executives
## # A tibble: 2,088 × 2
##    occupation                                occupation_col                     
##    <chr>                                     <fct>                              
##  1 Chief executives                          Jobs                               
##  2 General and operations managers           General and operations managers    
##  3 Legislators                               Legislators                        
##  4 Advertising and promotions managers       Advertising and promotions managers
##  5 Marketing and sales managers              Marketing and sales managers       
##  6 Public relations and fundraising managers Public relations and fundraising m…
##  7 Administrative services managers          Administrative services managers   
##  8 Computer and information systems managers Computer and information systems m…
##  9 Financial managers                        Financial managers                 
## 10 Compensation and benefits managers        Compensation and benefits managers 
## # … with 2,078 more rows
# Lump small levels into other categories
jobs_gender %>% count(occupation)
## # A tibble: 522 × 2
##    occupation                                                            n
##    <chr>                                                             <int>
##  1 Accountants and auditors                                              4
##  2 Actors                                                                4
##  3 Actuaries                                                             4
##  4 Adhesive bonding machine operators and tenders                        4
##  5 Administrative services managers                                      4
##  6 Advertising and promotions managers                                   4
##  7 Advertising sales agents                                              4
##  8 Aerospace engineers                                                   4
##  9 Agents and business managers of artists, performers, and athletes     4
## 10 Agricultural and food science technicians                             4
## # … with 512 more rows
jobs_gender %>% mutate(occupation_lump = fct_lump(occupation)) %>% distinct(occupation_lump)
## # A tibble: 522 × 1
##    occupation_lump                          
##    <fct>                                    
##  1 Chief executives                         
##  2 General and operations managers          
##  3 Legislators                              
##  4 Advertising and promotions managers      
##  5 Marketing and sales managers             
##  6 Public relations and fundraising managers
##  7 Administrative services managers         
##  8 Computer and information systems managers
##  9 Financial managers                       
## 10 Compensation and benefits managers       
## # … with 512 more rows

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump

Chapter 16

No need to do anything here.