rating <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-01-25/ratings.csv', show_col_types = FALSE)
rating
## # A tibble: 21,831 × 10
## num id name year rank average bayes_average users_rated url
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 105 30549 Pandemic 2008 106 7.59 7.49 108975 /boa…
## 2 189 822 Carcassonne 2000 190 7.42 7.31 108738 /boa…
## 3 428 13 Catan 1995 429 7.14 6.97 108024 /boa…
## 4 72 68448 7 Wonders 2010 73 7.74 7.63 89982 /boa…
## 5 103 36218 Dominion 2008 104 7.61 7.50 81561 /boa…
## 6 191 9209 Ticket to R… 2004 192 7.41 7.30 76171 /boa…
## 7 100 178900 Codenames 2015 101 7.6 7.51 74419 /boa…
## 8 3 167791 Terraformin… 2016 4 8.42 8.27 74216 /boa…
## 9 15 173346 7 Wonders D… 2015 16 8.11 7.98 69472 /boa…
## 10 35 31260 Agricola 2007 36 7.93 7.81 66093 /boa…
## # ℹ 21,821 more rows
## # ℹ 1 more variable: thumbnail <chr>
ratings <- head(rating, 50) %>% select(name, rank, average) %>% mutate(score = floor(average))
ratings
## # A tibble: 50 × 4
## name rank average score
## <chr> <dbl> <dbl> <dbl>
## 1 Pandemic 106 7.59 7
## 2 Carcassonne 190 7.42 7
## 3 Catan 429 7.14 7
## 4 7 Wonders 73 7.74 7
## 5 Dominion 104 7.61 7
## 6 Ticket to Ride 192 7.41 7
## 7 Codenames 101 7.6 7
## 8 Terraforming Mars 4 8.42 8
## 9 7 Wonders Duel 16 8.11 8
## 10 Agricola 36 7.93 7
## # ℹ 40 more rows
ratings %>% count(score)
## # A tibble: 4 × 2
## score n
## <dbl> <int>
## 1 5 1
## 2 6 1
## 3 7 38
## 4 8 10
ratings <- ratings %>%
mutate(score = score %>% factor(levels = unique(score)))
ratings
## # A tibble: 50 × 4
## name rank average score
## <chr> <dbl> <dbl> <fct>
## 1 Pandemic 106 7.59 7
## 2 Carcassonne 190 7.42 7
## 3 Catan 429 7.14 7
## 4 7 Wonders 73 7.74 7
## 5 Dominion 104 7.61 7
## 6 Ticket to Ride 192 7.41 7
## 7 Codenames 101 7.6 7
## 8 Terraforming Mars 4 8.42 8
## 9 7 Wonders Duel 16 8.11 8
## 10 Agricola 36 7.93 7
## # ℹ 40 more rows
Make two bar charts here - one before ordering another after
ratings_summary <- ratings %>%
group_by(score) %>%
summarise(
rank = mean(rank, na.rm = TRUE)
)
ratings_summary
## # A tibble: 4 × 2
## score rank
## <fct> <dbl>
## 1 7 223.
## 2 8 13.1
## 3 6 773
## 4 5 4796
ggplot(ratings_summary, aes(rank, score)) + geom_point()
ggplot(ratings_summary, aes(rank, fct_reorder(score, rank))) + geom_point()
Show examples of three functions:
ratings <- ratings %>%
mutate(score = fct_recode(score,
"Great" = "8", "Good" = "7", "Okay" = "6", "Meh" = "5"))
ratings %>% count(score)
## # A tibble: 4 × 2
## score n
## <fct> <int>
## 1 Good 38
## 2 Great 10
## 3 Okay 1
## 4 Meh 1
ratings %>%
mutate(score = fct_collapse(score,
better = c("Great", "Good"),
worse = c("Okay", "Meh")
)) %>%
count(score)
## # A tibble: 2 × 2
## score n
## <fct> <int>
## 1 better 48
## 2 worse 2
ratings %>%
mutate(score = fct_lump(score)) %>%
count(score)
## # A tibble: 2 × 2
## score n
## <fct> <int>
## 1 Good 38
## 2 Other 12
No need to do anything here.