Import your data

data <- read_xlsx("../00_data/data/myData.xlsx")

Chapter 15

Create a factor

data %>% count(type)
## # A tibble: 5 × 2
##   type              n
##   <chr>         <int>
## 1 Compilation      15
## 2 Greatest Hits    12
## 3 Live             12
## 4 Soundtrack        3
## 5 Studio          458
type_levels <- c("Studio", "Compilation", "Greatest Hits", "Live", "Soundtrack")

data_rev <- data %>%
    mutate(type = type %>% factor(levels = type_levels))

data_rev %>% count(type)
## # A tibble: 5 × 2
##   type              n
##   <fct>         <int>
## 1 Studio          458
## 2 Compilation      15
## 3 Greatest Hits    12
## 4 Live             12
## 5 Soundtrack        3

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- data %>%
  group_by(type) %>%
  summarise(
    peak_billboard_position = mean(peak_billboard_position, na.rm = TRUE),
  )

data_summary
## # A tibble: 5 × 2
##   type          peak_billboard_position
##   <chr>                           <dbl>
## 1 Compilation                     179  
## 2 Greatest Hits                   118. 
## 3 Live                             43.2
## 4 Soundtrack                       47.3
## 5 Studio                           50.3
ggplot(data_summary, aes(peak_billboard_position, type)) + geom_point()

ggplot(data_summary, aes(peak_billboard_position, fct_reorder(type, peak_billboard_position))) + geom_point()

Modify factor levels

Show examples of three functions:

fct_recode

data %>%
    mutate(type = fct_recode(type,
                             "Compilation of Artists Songs" = "Compilation",
                             "Greatest Songs of an Era" = "Greatest Hits",
                             "Live Recording of an Artist" = "Live",
                             "A movie/show/performance Soundtrack" = "Soundtrack",
                             "A true Studio Album from an Artist/Group" = "Studio")) %>%
    count(type)
## # A tibble: 5 × 2
##   type                                         n
##   <fct>                                    <int>
## 1 Compilation of Artists Songs                15
## 2 Greatest Songs of an Era                    12
## 3 Live Recording of an Artist                 12
## 4 A movie/show/performance Soundtrack          3
## 5 A true Studio Album from an Artist/Group   458

fct_collapse

data %>%
    mutate(genre = fct_collapse(genre,
                                `Rock & Alternative` = c("Blues/Blues Rock",
                                                         "Blues/Blues ROck",
                                                         "Hard Rock/Metal", 
                                                         "Indie/Alternative Rock", 
                                                         "Punk/Post-Punk/New Wave/Power Pop", 
                                                         "Rock n' Roll/Rhythm & Blues"),
                                `Pop & Soul` = c("Funk/Disco", 
                                                 "Soul/Gospel/R&B", 
                                                 "Singer-Songwriter/Heartland Rock", 
                                                 "Big Band/Jazz"),
                                `Roots & Folk` = c("Country/Folk/Country Rock/Folk Rock", 
                                                   "Blues/Blues Rock", 
                                                   "Singer-Songwriter/Heartland Rock"),
                                `Global Rhythms` = c("Afrobeat", 
                                                     "Latin", 
                                                     "Reggae"),
                                `Hip-Hop & Electronic` = c("Hip-Hop/Rap", 
                                                           "Electronic"))) %>% 
    count(genre)
## # A tibble: 6 × 2
##   genre                    n
##   <fct>                <int>
## 1 Global Rhythms          13
## 2 Pop & Soul              94
## 3 Roots & Folk            83
## 4 Rock & Alternative     132
## 5 Hip-Hop & Electronic    68
## 6 <NA>                   110

fct_lump

data %>%
    mutate(type = fct_lump(type)) %>%
    count(type)
## # A tibble: 2 × 2
##   type       n
##   <fct>  <int>
## 1 Studio   458
## 2 Other     42

Chapter 16

No need to do anything here.