data <- read_xlsx("../00_data/data/myData.xlsx")
data %>% count(type)
## # A tibble: 5 × 2
## type n
## <chr> <int>
## 1 Compilation 15
## 2 Greatest Hits 12
## 3 Live 12
## 4 Soundtrack 3
## 5 Studio 458
type_levels <- c("Studio", "Compilation", "Greatest Hits", "Live", "Soundtrack")
data_rev <- data %>%
mutate(type = type %>% factor(levels = type_levels))
data_rev %>% count(type)
## # A tibble: 5 × 2
## type n
## <fct> <int>
## 1 Studio 458
## 2 Compilation 15
## 3 Greatest Hits 12
## 4 Live 12
## 5 Soundtrack 3
Make two bar charts here - one before ordering another after
data_summary <- data %>%
group_by(type) %>%
summarise(
peak_billboard_position = mean(peak_billboard_position, na.rm = TRUE),
)
data_summary
## # A tibble: 5 × 2
## type peak_billboard_position
## <chr> <dbl>
## 1 Compilation 179
## 2 Greatest Hits 118.
## 3 Live 43.2
## 4 Soundtrack 47.3
## 5 Studio 50.3
ggplot(data_summary, aes(peak_billboard_position, type)) + geom_point()
ggplot(data_summary, aes(peak_billboard_position, fct_reorder(type, peak_billboard_position))) + geom_point()
Show examples of three functions:
data %>%
mutate(type = fct_recode(type,
"Compilation of Artists Songs" = "Compilation",
"Greatest Songs of an Era" = "Greatest Hits",
"Live Recording of an Artist" = "Live",
"A movie/show/performance Soundtrack" = "Soundtrack",
"A true Studio Album from an Artist/Group" = "Studio")) %>%
count(type)
## # A tibble: 5 × 2
## type n
## <fct> <int>
## 1 Compilation of Artists Songs 15
## 2 Greatest Songs of an Era 12
## 3 Live Recording of an Artist 12
## 4 A movie/show/performance Soundtrack 3
## 5 A true Studio Album from an Artist/Group 458
data %>%
mutate(genre = fct_collapse(genre,
`Rock & Alternative` = c("Blues/Blues Rock",
"Blues/Blues ROck",
"Hard Rock/Metal",
"Indie/Alternative Rock",
"Punk/Post-Punk/New Wave/Power Pop",
"Rock n' Roll/Rhythm & Blues"),
`Pop & Soul` = c("Funk/Disco",
"Soul/Gospel/R&B",
"Singer-Songwriter/Heartland Rock",
"Big Band/Jazz"),
`Roots & Folk` = c("Country/Folk/Country Rock/Folk Rock",
"Blues/Blues Rock",
"Singer-Songwriter/Heartland Rock"),
`Global Rhythms` = c("Afrobeat",
"Latin",
"Reggae"),
`Hip-Hop & Electronic` = c("Hip-Hop/Rap",
"Electronic"))) %>%
count(genre)
## # A tibble: 6 × 2
## genre n
## <fct> <int>
## 1 Global Rhythms 13
## 2 Pop & Soul 94
## 3 Roots & Folk 83
## 4 Rock & Alternative 132
## 5 Hip-Hop & Electronic 68
## 6 <NA> 110
data %>%
mutate(type = fct_lump(type)) %>%
count(type)
## # A tibble: 2 × 2
## type n
## <fct> <int>
## 1 Studio 458
## 2 Other 42
No need to do anything here.