myData <- read.csv("../00_data/boardgames_details.csv")
data_small <- myData %>%
select(primary, yearpublished, minplayers) %>%
filter(primary %in% c("Robin Hood", "Chaos", "Pandemic", "Azul"))
data_small %>% distinct(primary) %>%
filter(primary %in% c("Robin Hood", "Chaos", "Pandemic", "Azul"))
## primary
## 1 Pandemic
## 2 Azul
## 3 Robin Hood
## 4 Chaos
Make two bar charts here - one before ordering another after #### Bar chart before ordering
data_before_ordering <- data_small %>%
group_by(primary) %>%
summarise(
yearpublished = mean(yearpublished, na.rm = TRUE),
minplayers = mean(minplayers, na.rm = TRUE),
n = n()
)
ggplot(data_before_ordering, aes(yearpublished, primary)) + geom_point()
#### Bar chart after ordering
data_after_ordering <- data_small %>%
group_by(primary) %>%
summarise(
yearpublished = mean(yearpublished, na.rm = TRUE),
minplayers = mean(minplayers, na.rm = TRUE),
n = n()
)
ggplot(data_after_ordering, aes(yearpublished, fct_reorder(primary, yearpublished))) + geom_point()
Show examples of three functions:
data_small %>% distinct(primary)
## primary
## 1 Pandemic
## 2 Azul
## 3 Robin Hood
## 4 Chaos
# Recode
data_small %>%
# Rename levels
mutate(primary_rev =fct_recode(primary, "Hood Robin" = "Robin Hood")) %>%
select(primary, primary_rev) %>%
filter(primary == "Robin Hood")
## primary primary_rev
## 1 Robin Hood Hood Robin
## 2 Robin Hood Hood Robin
## 3 Robin Hood Hood Robin
## 4 Robin Hood Hood Robin
## 5 Robin Hood Hood Robin
## 6 Robin Hood Hood Robin
# Collapse multiple levels into one
data_small %>%
mutate(primary_col = fct_collapse(primary, "Board Game Names" = c("Azul", "Pandemic", "Robin Hood"))) %>%
select(primary, primary_col) %>%
filter(primary != "Chaos")
## primary primary_col
## 1 Pandemic Board Game Names
## 2 Azul Board Game Names
## 3 Robin Hood Board Game Names
## 4 Robin Hood Board Game Names
## 5 Robin Hood Board Game Names
## 6 Robin Hood Board Game Names
## 7 Robin Hood Board Game Names
## 8 Robin Hood Board Game Names
# Lump small levels into other levels
data_small %>% count(primary)
## primary n
## 1 Azul 1
## 2 Chaos 4
## 3 Pandemic 1
## 4 Robin Hood 6
data_small %>% mutate(primary_lump = fct_lump(primary)) %>% distinct(primary_lump)
## primary_lump
## 1 Other
## 2 Robin Hood
## 3 Chaos
No need to do anything here.