library(tidyverse) library(ggplot2)
library(dplyr)
ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/ratings.csv')
details <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/details.csv')
Ratings Table
colnames(ratings)## [1] "num" "id" "name" "year"
## [5] "rank" "average" "bayes_average" "users_rated"
## [9] "url" "thumbnail"
Details Table
colnames(details)## [1] "num" "id"
## [3] "primary" "description"
## [5] "yearpublished" "minplayers"
## [7] "maxplayers" "playingtime"
## [9] "minplaytime" "maxplaytime"
## [11] "minage" "boardgamecategory"
## [13] "boardgamemechanic" "boardgamefamily"
## [15] "boardgameexpansion" "boardgameimplementation"
## [17] "boardgamedesigner" "boardgameartist"
## [19] "boardgamepublisher" "owned"
## [21] "trading" "wanting"
## [23] "wishing"
I used the id column as the key in the two data sets.
join_rating_details <- ratings %>%
inner_join(details, by = "id")
glimpse(join_rating_details)bar_chart_data <- join_rating_details %>%
group_by(maxplayers) %>%
summarise(avg_max_time = mean(maxplaytime)) %>%
filter(maxplayers <= 10) %>%
filter(avg_max_time <= 360) %>%
filter(maxplayers != 0)ggplot(data = bar_chart_data) +
geom_col(mapping = aes(maxplayers, avg_max_time), fill="darkorchid3")+
labs(x="Number of Players",y="Playing Time", title = "HOW LONG WILL A GAME TAKE BASED ON THE NUMBER OF PLAYERS?",
subtitle = "Two, Three and Seven Player Games Typically Take Longer to Play",
caption = "Created by Nick Guendel with data from ***")+
scale_x_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9,10)
)Modern games have the highest average rating. The data shows users, on average, rate newly published games higher than past games. Some interesting factors on this are that newly published games have the advantage of being rated by user when the game is being published and not decades after its published.
df_year_vs_rating <- join_rating_details %>%
filter(yearpublished >= 1950)
ggplot(data = df_year_vs_rating) +
geom_jitter(mapping = aes(x = yearpublished, y = average, alpha = .25)) +
geom_smooth(mapping = aes(x = yearpublished, y = average, color="User Average"),
method = "gam", formula = y ~ s(x, bs = "cs"))+
labs(x="Year Published",y="Average Rating",
title = "BBG Current Rating of Games Bases on the Year Published",
subtitle = "BBG ratings are highest for the most recently published games",
caption = "Created by N. Guendel with data from ***")+
theme(legend.position="none")# Selecting Data and Cleaning data
board_games <- join_rating_details %>%
select(id,name,yearpublished,boardgamecategory,boardgamedesigner,average,yearpublished,bayes_average)
designer_cleaned <-
data.frame(designer = gsub("\\[|\\]|\\'|\\(|\\)|\"", "", as.character(board_games$boardgamedesigner)))
# Combining the cleaned rows with the original data set and removing unclean columns
board_game_designer <- bind_cols(board_games, designer_cleaned, .id="id") %>%
select(-boardgamedesigner)
# Separating the list of categories into individual rows
board_game_designer <- separate_rows(board_game_designer, designer, sep = ", ")
# Counts the number in the average
num_games_of_designer <- board_game_designer %>%
count(designer, sort = TRUE)
# average of all games by that designer
board_game_designer_avg <- board_game_designer %>%
group_by(designer) %>%
summarise(designer_avg=mean(average))
board_game_designer <- board_game_designer_avg %>%
inner_join(num_games_of_designer, by="designer")
board_game_designer_avg_filtered <- board_game_designer %>%
filter(n > 4) %>%
filter(designer_avg > (mean(designer_avg)+(2*sd(designer_avg))))
# Plotting designer averages
ggplot(data = board_game_designer_avg_filtered) +
geom_col(aes(x = reorder(designer, designer_avg), y = designer_avg), fill = "brown2")+
coord_flip()+
labs(x="Game Designer",y="User Rating",title = "Rankings of Top Game Designers")+
theme_minimal()board_game_category <- join_rating_details %>%
select(name,boardgamecategory,average)
cat_cleaned <-
data.frame(board_game_category = gsub("\\[|\\]|\\'|\\(|\\)|\"", "",
as.character(board_game_category$boardgamecategory)))
board_game_category <- bind_cols(board_game_category, cat_cleaned)
board_game_category <- board_game_category %>%
select(-boardgamecategory)
board_game_category <- board_game_category %>%
rename(category=board_game_category)
board_game_category <- separate_rows(board_game_category, category, sep = ", ")
num_games_per_category <- board_game_category %>%
count(category, sort = TRUE)
board_game_category <- board_game_category %>%
group_by(category) %>%
summarise(avg=mean(average))
board_game_category <- num_games_per_category %>%
inner_join(board_game_category, by="category")
graph_filter <- board_game_category %>%
filter(category != 'NA') %>%
filter(category!='Fan Expansion') %>%
filter(avg > mean(avg)) %>%
filter(n > 500)
ggplot(data = graph_filter) +
geom_col(aes(x = reorder(category, avg), y = avg), fill = "deepskyblue4")+
coord_flip()+
labs(x="Category",y="User Rating",title = "Highest Rated Games By Category")+
theme_minimal()