Import your data

rating <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-01-25/ratings.csv', show_col_types = FALSE)
rating
## # A tibble: 21,831 × 10
##      num     id name          year  rank average bayes_average users_rated url  
##    <dbl>  <dbl> <chr>        <dbl> <dbl>   <dbl>         <dbl>       <dbl> <chr>
##  1   105  30549 Pandemic      2008   106    7.59          7.49      108975 /boa…
##  2   189    822 Carcassonne   2000   190    7.42          7.31      108738 /boa…
##  3   428     13 Catan         1995   429    7.14          6.97      108024 /boa…
##  4    72  68448 7 Wonders     2010    73    7.74          7.63       89982 /boa…
##  5   103  36218 Dominion      2008   104    7.61          7.50       81561 /boa…
##  6   191   9209 Ticket to R…  2004   192    7.41          7.30       76171 /boa…
##  7   100 178900 Codenames     2015   101    7.6           7.51       74419 /boa…
##  8     3 167791 Terraformin…  2016     4    8.42          8.27       74216 /boa…
##  9    15 173346 7 Wonders D…  2015    16    8.11          7.98       69472 /boa…
## 10    35  31260 Agricola      2007    36    7.93          7.81       66093 /boa…
## # ℹ 21,821 more rows
## # ℹ 1 more variable: thumbnail <chr>
ratings <- head(rating, 50) %>% select(name, rank, average) %>% mutate(score = floor(average))
ratings
## # A tibble: 50 × 4
##    name               rank average score
##    <chr>             <dbl>   <dbl> <dbl>
##  1 Pandemic            106    7.59     7
##  2 Carcassonne         190    7.42     7
##  3 Catan               429    7.14     7
##  4 7 Wonders            73    7.74     7
##  5 Dominion            104    7.61     7
##  6 Ticket to Ride      192    7.41     7
##  7 Codenames           101    7.6      7
##  8 Terraforming Mars     4    8.42     8
##  9 7 Wonders Duel       16    8.11     8
## 10 Agricola             36    7.93     7
## # ℹ 40 more rows

Chapter 15

Create a factor

ratings %>% count(score)
## # A tibble: 4 × 2
##   score     n
##   <dbl> <int>
## 1     5     1
## 2     6     1
## 3     7    38
## 4     8    10
ratings <- ratings %>%
  mutate(score = score %>% factor(levels = unique(score)))
ratings
## # A tibble: 50 × 4
##    name               rank average score
##    <chr>             <dbl>   <dbl> <fct>
##  1 Pandemic            106    7.59 7    
##  2 Carcassonne         190    7.42 7    
##  3 Catan               429    7.14 7    
##  4 7 Wonders            73    7.74 7    
##  5 Dominion            104    7.61 7    
##  6 Ticket to Ride      192    7.41 7    
##  7 Codenames           101    7.6  7    
##  8 Terraforming Mars     4    8.42 8    
##  9 7 Wonders Duel       16    8.11 8    
## 10 Agricola             36    7.93 7    
## # ℹ 40 more rows

Modify factor order

Make two bar charts here - one before ordering another after

ratings_summary <- ratings %>%
  group_by(score) %>%
  summarise(
    rank = mean(rank, na.rm = TRUE)
  )

ratings_summary
## # A tibble: 4 × 2
##   score   rank
##   <fct>  <dbl>
## 1 7      223. 
## 2 8       13.1
## 3 6      773  
## 4 5     4796
ggplot(ratings_summary, aes(rank, score)) + geom_point()

ggplot(ratings_summary, aes(rank, fct_reorder(score, rank))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
ratings <- ratings %>%
  mutate(score = fct_recode(score, 
        "Great" = "8", "Good" = "7", "Okay" = "6", "Meh" = "5"))
ratings %>% count(score)
## # A tibble: 4 × 2
##   score     n
##   <fct> <int>
## 1 Good     38
## 2 Great    10
## 3 Okay      1
## 4 Meh       1
  • fct_collapse
ratings %>%
  mutate(score = fct_collapse(score,
    better = c("Great", "Good"),
    worse = c("Okay", "Meh")
  )) %>%
  count(score)
## # A tibble: 2 × 2
##   score      n
##   <fct>  <int>
## 1 better    48
## 2 worse      2
  • fct_lump
ratings %>%
  mutate(score = fct_lump(score)) %>%
  count(score)
## # A tibble: 2 × 2
##   score     n
##   <fct> <int>
## 1 Good     38
## 2 Other    12

Chapter 16

No need to do anything here.