Import your data

myData <- read_excel("../00_data/Salaries.xlsx")
myData
## # A tibble: 397 × 6
##    rank      discipline yrs.since.phd yrs.service sex    salary
##    <chr>     <chr>              <dbl>       <dbl> <chr>   <dbl>
##  1 Prof      B                     19          18 Male   139750
##  2 Prof      B                     20          16 Male   173200
##  3 AsstProf  B                      4           3 Male    79750
##  4 Prof      B                     45          39 Male   115000
##  5 Prof      B                     40          41 Male   141500
##  6 AssocProf B                      6           6 Male    97000
##  7 Prof      B                     30          23 Male   175000
##  8 Prof      B                     45          45 Male   147765
##  9 Prof      B                     21          20 Male   119250
## 10 Prof      B                     18          18 Female 129000
## # … with 387 more rows

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after Unordered factor levels

# Transform data
myDataTransofmed <- myData %>% 
    group_by(sex) %>%
    summarise(
        avg_salary = mean(salary, na.rm = TRUE)
    )

myDataTransofmed
## # A tibble: 2 × 2
##   sex    avg_salary
##   <chr>       <dbl>
## 1 Female    101002.
## 2 Male      115090.
# Plot
myDataTransofmed %>%
    
    ggplot(aes(x = avg_salary, y = sex)) +
    geom_point()

Ordered facotr levels

myDataTransofmed %>%
    
    ggplot(aes(x = avg_salary, y = fct_reorder(.f = sex, .x = avg_salary))) +
    
    geom_point() +
    
    labs(y = NULL, x = "Mean Salary by sex")

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump
myDataTransofmed %>% distinct(sex)
## # A tibble: 2 × 1
##   sex   
##   <chr> 
## 1 Female
## 2 Male
# Recode

myDataTransofmed %>%
    
    # Rename levels
    mutate(sex_rev = fct_recode(sex, "Male" = "Man")) %>%
    select(sex, sex_rev) %>%
    filter(sex == "Man") 
## Warning: Unknown levels in `f`: Man
## # A tibble: 0 × 2
## # … with 2 variables: sex <chr>, sex_rev <fct>
# Collapse multple levels into one

myDataTransofmed %>%
    
    mutate(sex_col = fct_collapse(sex, "Male" = c("Male", "Other"))) %>%
    select(sex, sex_col) %>%
    filter(sex != "Female")
## Warning: Unknown levels in `f`: Other
## # A tibble: 1 × 2
##   sex   sex_col
##   <chr> <fct>  
## 1 Male  Male
# Lump small levels into other levels
myDataTransofmed %>% count(sex)
## # A tibble: 2 × 2
##   sex        n
##   <chr>  <int>
## 1 Female     1
## 2 Male       1
myDataTransofmed %>% mutate(sex_lump = fct_lump(sex)) %>% distinct(sex_lump)
## # A tibble: 2 × 1
##   sex_lump
##   <fct>   
## 1 Female  
## 2 Male

Chapter 16

No need to do anything here.