Import your data

data <- read_excel("../00_data/myData.xlsx")

Chapter 15

Create a factor

data_small <- data %>% 
    sample_n(20) %>%
    select(clean_name, type, peak_billboard_position)

data_small
## # A tibble: 20 × 3
##    clean_name             type   peak_billboard_position
##    <chr>                  <chr>                    <dbl>
##  1 Alice Cooper           Studio                      35
##  2 Television             Studio                     201
##  3 U2                     Studio                       1
##  4 The Cure               Studio                      12
##  5 Elvis Costello         Studio                      30
##  6 Cheap Trick            Studio                      73
##  7 Motörhead              Studio                     201
##  8 The Velvet Underground Studio                     199
##  9 Eagles                 Studio                       1
## 10 X                      Studio                     201
## 11 Portishead             Studio                      79
## 12 Yes                    Studio                       3
## 13 Nirvana                Live                         1
## 14 The Notorious B.I.G.   Studio                       1
## 15 Minutemen              Studio                     201
## 16 The Velvet Underground Studio                     201
## 17 Love                   Studio                     154
## 18 The Go-Go's            Studio                       1
## 19 Arctic Monkeys         Studio                       6
## 20 Fiona Apple            Studio                       7

Modify factor order

Make two bar charts here - one before ordering another after

# Before ordering
Peak_position_summary <- data_small %>%
    group_by(type) %>%
    summarise(peak_billboard_position = mean(peak_billboard_position, na.rm = TRUE))

ggplot(Peak_position_summary, aes(peak_billboard_position, type)) + 
    geom_point()

# After ordering
ggplot(Peak_position_summary, aes(peak_billboard_position, fct_reorder(type, peak_billboard_position))) +
    geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(type_rev = fct_recode(type, "Studio recorded" = "Studio")) %>%
    select(type, type_rev) %>%
    filter(type == "Studio")
## # A tibble: 608 × 2
##    type   type_rev       
##    <chr>  <fct>          
##  1 Studio Studio recorded
##  2 Studio Studio recorded
##  3 Studio Studio recorded
##  4 Studio Studio recorded
##  5 Studio Studio recorded
##  6 Studio Studio recorded
##  7 Studio Studio recorded
##  8 Studio Studio recorded
##  9 Studio Studio recorded
## 10 Studio Studio recorded
## # ℹ 598 more rows
  • fct_collapse
data %>%
    mutate(type_col = fct_collapse(type, "Studio record" = c("Studio", "Studio"))) %>%
    select(type, type_col) %>%
    filter(type != "Compilation") %>%
    filter(type != "Live") %>%
    sample_n(10)
## # A tibble: 10 × 2
##    type          type_col     
##    <chr>         <fct>        
##  1 Studio        Studio record
##  2 Studio        Studio record
##  3 Studio        Studio record
##  4 Studio        Studio record
##  5 Studio        Studio record
##  6 Studio        Studio record
##  7 Studio        Studio record
##  8 Studio        Studio record
##  9 Studio        Studio record
## 10 Greatest Hits Greatest Hits
  • fct_lump
data %>% count(type)
## # A tibble: 5 × 2
##   type              n
##   <chr>         <int>
## 1 Compilation      38
## 2 Greatest Hits    23
## 3 Live             19
## 4 Soundtrack        3
## 5 Studio          608
    data %>% mutate(type_lump = fct_lump(type, n = 2)) %>% distinct(type_lump)
## # A tibble: 3 × 1
##   type_lump  
##   <fct>      
## 1 Studio     
## 2 Compilation
## 3 Other

Chapter 16

No need to do anything here.