Import your data

# excel file
data <- read_excel("Salaries.xlsx")
data
## # A tibble: 397 × 6
##    rank      discipline yrs.since.phd yrs.service sex    salary
##    <chr>     <chr>              <dbl>       <dbl> <chr>   <dbl>
##  1 Prof      B                     19          18 Male   139750
##  2 Prof      B                     20          16 Male   173200
##  3 AsstProf  B                      4           3 Male    79750
##  4 Prof      B                     45          39 Male   115000
##  5 Prof      B                     40          41 Male   141500
##  6 AssocProf B                      6           6 Male    97000
##  7 Prof      B                     30          23 Male   175000
##  8 Prof      B                     45          45 Male   147765
##  9 Prof      B                     21          20 Male   119250
## 10 Prof      B                     18          18 Female 129000
## # … with 387 more rows

Chapter 15

Create a factor

# The variable with the least amount of unique observations is yrs.service..

data <- data %>%
    
    group_by(yrs.service) %>%
    summarise(avg_rank = mean(rank, na.rm = TRUE))
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA

Modify factor order

Make two bar charts here - one before ordering another after

# Transform data: calculate average salary by rank
data <- data %>%
    
    group_by("rank") %>%
    summarise(
        avg_salary = mean("salary", na.rm = TRUE)
    )
## Warning in mean.default("salary", na.rm = TRUE): argument is not numeric or
## logical: returning NA
data
## # A tibble: 1 × 2
##   `"rank"` avg_salary
##   <chr>         <dbl>
## 1 rank             NA
# Plot before ordering
data %>%
    
    ggplot(aes(x = "rank", y = "salary")) +
    geom_point()

# Plot Before Ordering
data %>%
    
    ggplot(aes(x = "rank", y = "yrs.service")) +
    geom_point()

# Plot After Ordering
data %>%
    
    ggplot(aes(x = "rank", y = fct_reorder(.f = "yrs.service", .x = "rank"))) +
    geom_point() +

# Labeling
labs(y = NULL, x = "Average Years of Service")

Modify factor levels

Show examples of three functions:

# fct_recode
data %>% distinct("yrs.service")
## # A tibble: 1 × 1
##   `"yrs.service"`
##   <chr>          
## 1 yrs.service
# fct_collapse
data %>% mutate("yrs.service")
## # A tibble: 1 × 3
##   `"rank"` avg_salary `"yrs.service"`
##   <chr>         <dbl> <chr>          
## 1 rank             NA yrs.service
# fct_lump
data %>% count("yrs.service")
## # A tibble: 1 × 2
##   `"yrs.service"`     n
##   <chr>           <int>
## 1 yrs.service         1

Chapter 16

No need to do anything here.