Import your data

myData <- read.csv("../00_data/boardgames_details.csv")

data_small <- myData %>%
    select(primary, yearpublished, minplayers) %>%
    filter(primary %in% c("Robin Hood", "Chaos", "Pandemic", "Azul"))

Chapter 15

Create a factor

data_small %>% distinct(primary) %>%
    filter(primary %in% c("Robin Hood", "Chaos", "Pandemic", "Azul"))
##      primary
## 1   Pandemic
## 2       Azul
## 3 Robin Hood
## 4      Chaos

Modify factor order

Make two bar charts here - one before ordering another after #### Bar chart before ordering

data_before_ordering <- data_small %>%
    group_by(primary) %>%
    summarise(
        yearpublished = mean(yearpublished, na.rm = TRUE),
        minplayers = mean(minplayers, na.rm = TRUE),
        n = n()
    )
ggplot(data_before_ordering, aes(yearpublished, primary)) + geom_point()

#### Bar chart after ordering

data_after_ordering <- data_small %>%
    group_by(primary) %>%
    summarise(
        yearpublished = mean(yearpublished, na.rm = TRUE),
        minplayers = mean(minplayers, na.rm = TRUE),
        n = n()
    )
ggplot(data_after_ordering, aes(yearpublished, fct_reorder(primary, yearpublished))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump

Modify factor levels fct_recode

data_small %>% distinct(primary)
##      primary
## 1   Pandemic
## 2       Azul
## 3 Robin Hood
## 4      Chaos
# Recode
data_small %>%
    
    # Rename levels
    mutate(primary_rev =fct_recode(primary, "Hood Robin" = "Robin Hood")) %>%
    select(primary, primary_rev) %>%
    filter(primary == "Robin Hood")
##      primary primary_rev
## 1 Robin Hood  Hood Robin
## 2 Robin Hood  Hood Robin
## 3 Robin Hood  Hood Robin
## 4 Robin Hood  Hood Robin
## 5 Robin Hood  Hood Robin
## 6 Robin Hood  Hood Robin

Modify factor levels fct_collapse

# Collapse multiple levels into one
data_small %>%
    
    mutate(primary_col = fct_collapse(primary, "Board Game Names" = c("Azul", "Pandemic", "Robin Hood"))) %>%
    select(primary, primary_col) %>%
    filter(primary != "Chaos")
##      primary      primary_col
## 1   Pandemic Board Game Names
## 2       Azul Board Game Names
## 3 Robin Hood Board Game Names
## 4 Robin Hood Board Game Names
## 5 Robin Hood Board Game Names
## 6 Robin Hood Board Game Names
## 7 Robin Hood Board Game Names
## 8 Robin Hood Board Game Names

Modify factor levels fct_lump

# Lump small levels into other levels
data_small %>% count(primary)
##      primary n
## 1       Azul 1
## 2      Chaos 4
## 3   Pandemic 1
## 4 Robin Hood 6
data_small %>% mutate(primary_lump = fct_lump(primary)) %>% distinct(primary_lump)
##   primary_lump
## 1        Other
## 2   Robin Hood
## 3        Chaos

Chapter 16

No need to do anything here.