data <- read.csv("../00_data/myData.csv")
data %>% count(type)
## type n
## 1 Compilation 38
## 2 Greatest Hits 23
## 3 Live 19
## 4 Soundtrack 3
## 5 Studio 608
type_levels <- c("studio", "compilation","greatest hits","live","sounrtrack")
data_rev <- data %>%
mutate(type = type %>% factor(levels = type_levels))
Make two bar charts here - one before ordering another after
data_summary <- data %>%
group_by(type) %>%
summarise(
ave_age_at_top_500 = mean(ave_age_at_top_500, na.rm = TRUE),
)
data_summary
## # A tibble: 5 × 2
## type ave_age_at_top_500
## <chr> <dbl>
## 1 Compilation 48.2
## 2 Greatest Hits 46.3
## 3 Live 32.5
## 4 Soundtrack 29
## 5 Studio 27.9
ggplot(data_summary, aes(ave_age_at_top_500, type)) + geom_point()
ggplot(data_summary, aes(ave_age_at_top_500, fct_reorder(type, ave_age_at_top_500))) + geom_point()
Show examples of three functions:
data %>%
mutate(type =fct_recode(type,
"Compilation" ="Compilation",
"Greatest Hits" = "Greatest Hits",
"Live" = "Live",
"Soundtrack" = "Soundtrack",
"Studio" = "Studio")) %>%
count(type)
## type n
## 1 Compilation 38
## 2 Greatest Hits 23
## 3 Live 19
## 4 Soundtrack 3
## 5 Studio 608
data %>%
mutate(type = fct_collapse(type,
Compilation = "Compilation",
other = c("Greatest Hits", "Live", "Soundtrack", "Studio"))) %>%
count(type)
## type n
## 1 Compilation 38
## 2 other 653
data %>%
mutate(type =fct_lump(type)) %>%
count(type)
## type n
## 1 Studio 608
## 2 Other 83
No need to do anything here.