Import your data

data <- read_excel("../00_data/my_data.xlsx")
data
## # A tibble: 450 × 5
##    service component severity         diagnosed  year
##    <chr>   <chr>     <chr>            <chr>     <dbl>
##  1 Army    Active    Penetrating      189        2006
##  2 Army    Active    Severe           102        2006
##  3 Army    Active    Moderate         709        2006
##  4 Army    Active    Mild             5896       2006
##  5 Army    Active    Not Classifiable 122        2006
##  6 Army    Guard     Penetrating      33         2006
##  7 Army    Guard     Severe           26         2006
##  8 Army    Guard     Moderate         177        2006
##  9 Army    Guard     Mild             1332       2006
## 10 Army    Guard     Not Classifiable 29         2006
## # ℹ 440 more rows

Chapter 15

Create a factor

data %>% count(service)
## # A tibble: 4 × 2
##   service       n
##   <chr>     <int>
## 1 Air Force   135
## 2 Army        135
## 3 Marines      90
## 4 Navy         90
service_levels <- c("Navy", "Army", "Marines", "Air Force")

data_rev <- data %>%
    mutate(service = service %>% factor(levels = service_levels))

Modify factor order

Make two bar charts here - one before ordering another after

data_rev %>%
    group_by(service) %>%
    summarise(
       year = mean(year, na.rm = TRUE) 
    )
## # A tibble: 4 × 2
##   service    year
##   <fct>     <dbl>
## 1 Navy       2010
## 2 Army       2010
## 3 Marines    2010
## 4 Air Force  2010
data_rev
## # A tibble: 450 × 5
##    service component severity         diagnosed  year
##    <fct>   <chr>     <chr>            <chr>     <dbl>
##  1 Army    Active    Penetrating      189        2006
##  2 Army    Active    Severe           102        2006
##  3 Army    Active    Moderate         709        2006
##  4 Army    Active    Mild             5896       2006
##  5 Army    Active    Not Classifiable 122        2006
##  6 Army    Guard     Penetrating      33         2006
##  7 Army    Guard     Severe           26         2006
##  8 Army    Guard     Moderate         177        2006
##  9 Army    Guard     Mild             1332       2006
## 10 Army    Guard     Not Classifiable 29         2006
## # ℹ 440 more rows
ggplot(data_rev, aes(year, service)) + geom_point()

ggplot(data_rev, aes(year, fct_reorder(service, year))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data_rev %>%
    mutate(service = fct_recode(service,
                             "USNavy"      = "Navy",
                             "USArmy"      = "Army",
                             "USMarines"   = "Marines",
                             "USAir Force" = "Air Force")) %>%
    count(service)
## # A tibble: 4 × 2
##   service         n
##   <fct>       <int>
## 1 USNavy         90
## 2 USArmy        135
## 3 USMarines      90
## 4 USAir Force   135
  • fct_collapse
data %>%
    mutate(service = fct_collapse(service,
                                  USNavy = "Navy",
                                  Other  = c("USArmy", "USMarines", "USAir Force"))) %>%
    count(service)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `service = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: USArmy, USMarines, USAir Force
## # A tibble: 4 × 2
##   service       n
##   <fct>     <int>
## 1 Air Force   135
## 2 Army        135
## 3 Marines      90
## 4 USNavy       90
  • fct_lump
data %>%
    mutate(rank = fct_lump(service)) %>%
    count(service)
## # A tibble: 4 × 2
##   service       n
##   <chr>     <int>
## 1 Air Force   135
## 2 Army        135
## 3 Marines      90
## 4 Navy         90

Chapter 16

No need to do anything here.