data <- read_excel("../00_data/myData.xlsx")
data_small <- data %>%
sample_n(20) %>%
select(clean_name, type, peak_billboard_position)
data_small
## # A tibble: 20 × 3
## clean_name type peak_billboard_position
## <chr> <chr> <dbl>
## 1 Alice Cooper Studio 35
## 2 Television Studio 201
## 3 U2 Studio 1
## 4 The Cure Studio 12
## 5 Elvis Costello Studio 30
## 6 Cheap Trick Studio 73
## 7 Motörhead Studio 201
## 8 The Velvet Underground Studio 199
## 9 Eagles Studio 1
## 10 X Studio 201
## 11 Portishead Studio 79
## 12 Yes Studio 3
## 13 Nirvana Live 1
## 14 The Notorious B.I.G. Studio 1
## 15 Minutemen Studio 201
## 16 The Velvet Underground Studio 201
## 17 Love Studio 154
## 18 The Go-Go's Studio 1
## 19 Arctic Monkeys Studio 6
## 20 Fiona Apple Studio 7
Make two bar charts here - one before ordering another after
# Before ordering
Peak_position_summary <- data_small %>%
group_by(type) %>%
summarise(peak_billboard_position = mean(peak_billboard_position, na.rm = TRUE))
ggplot(Peak_position_summary, aes(peak_billboard_position, type)) +
geom_point()
# After ordering
ggplot(Peak_position_summary, aes(peak_billboard_position, fct_reorder(type, peak_billboard_position))) +
geom_point()
Show examples of three functions:
data %>%
mutate(type_rev = fct_recode(type, "Studio recorded" = "Studio")) %>%
select(type, type_rev) %>%
filter(type == "Studio")
## # A tibble: 608 × 2
## type type_rev
## <chr> <fct>
## 1 Studio Studio recorded
## 2 Studio Studio recorded
## 3 Studio Studio recorded
## 4 Studio Studio recorded
## 5 Studio Studio recorded
## 6 Studio Studio recorded
## 7 Studio Studio recorded
## 8 Studio Studio recorded
## 9 Studio Studio recorded
## 10 Studio Studio recorded
## # ℹ 598 more rows
data %>%
mutate(type_col = fct_collapse(type, "Studio record" = c("Studio", "Studio"))) %>%
select(type, type_col) %>%
filter(type != "Compilation") %>%
filter(type != "Live") %>%
sample_n(10)
## # A tibble: 10 × 2
## type type_col
## <chr> <fct>
## 1 Studio Studio record
## 2 Studio Studio record
## 3 Studio Studio record
## 4 Studio Studio record
## 5 Studio Studio record
## 6 Studio Studio record
## 7 Studio Studio record
## 8 Studio Studio record
## 9 Studio Studio record
## 10 Greatest Hits Greatest Hits
data %>% count(type)
## # A tibble: 5 × 2
## type n
## <chr> <int>
## 1 Compilation 38
## 2 Greatest Hits 23
## 3 Live 19
## 4 Soundtrack 3
## 5 Studio 608
data %>% mutate(type_lump = fct_lump(type, n = 2)) %>% distinct(type_lump)
## # A tibble: 3 × 1
## type_lump
## <fct>
## 1 Studio
## 2 Compilation
## 3 Other
No need to do anything here.