inventories <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventories.csv.gz')
## Rows: 33864 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): set_num
## dbl (2): id, version
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
inventory_sets <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventory_sets.csv.gz')
## Rows: 3942 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): set_num
## dbl (2): inventory_id, quantity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sets <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/sets.csv.gz')
## Rows: 19798 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): set_num, name, img_url
## dbl (3): year, theme_id, num_parts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
minifigs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/minifigs.csv.gz')
## Rows: 12550 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): fig_num, name, img_url
## dbl (1): num_parts
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
inventory_minifigs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventory_minifigs.csv.gz')
## Rows: 18999 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): fig_num
## dbl (2): inventory_id, quantity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- minifigs %>%
left_join(inventory_minifigs, by = "fig_num") %>%
left_join(inventories, by = c("inventory_id" = "id")) %>%
left_join(sets, by = "set_num") %>%
rename(minifig_parts = num_parts.x, minifig_name = name.x, minifig_img_url = img_url.x)
df_new <-df %>%
select(-contains("url"), -contains("id"), -contains("_num")) %>%
rename(set_name = name.y, set_parts = num_parts.y)
df2 <- df_new %>%
unnest_tokens(output = minfig_words, input = minifig_name)
df[[2]] <- tolower(df[[2]])
gender_df <- df %>%
group_by(minifig_name) %>%
mutate(gender = case_when(
str_detect(minifig_name, "woman|girl|female|queen|ponytail|emma|olivia|andrea|mia|stephanie|ms.|mrs.|mother|goddess|daughter|lady") ~ "female",
str_detect(minifig_name, "man|boy|sir|male|guy|king|beard|stubble|knight|lord|luke|harry|hagrid|darth|mr.|father|god|son|soldier|moustache|santa|lloyd|droid|jones|steve|john|alex|owen|trooper") ~ "male",
TRUE ~ "unknown")) %>%
filter(gender != "unknown")
df_final <- gender_df %>%
group_by(gender, year) %>%
drop_na(num_parts.y) %>%
mutate(observations = n()) %>%
mutate(avg_lego_pieces = mean(num_parts.y)) %>%
select(avg_lego_pieces, gender, year, observations) %>%
arrange(year)
df_final %>%
ggplot(aes(x = year, y = avg_lego_pieces))+
geom_point(aes(size = observations, color = gender), shape = 15, alpha = .5)+
scale_color_manual(values = c("steelblue2","#E18D0A"))+
geom_smooth(method = "lm", se = FALSE, aes(color = gender))+
theme_bw()+
labs(title = "How are Minifigure Gender and Number of Pieces Related?", subtitle = "The Relationship between Minifigure Gender and the Number of Pieces over Time \n", caption = "Tidy Tuesday 09-06-2022 | Github: @scolando")+
theme(panel.background = element_rect(fill = "ivory"), legend.position = "right", legend.justification = "left", plot.background = element_rect(fill = "aliceblue"), plot.title = element_text(face = "bold", color = "orangered4", hjust = 0.5, size = 15), legend.background = element_rect(fill = "aliceblue"), legend.key = element_rect(fill = "aliceblue"), plot.subtitle = element_text(face = "bold.italic", color = "orangered3", hjust = 0.5, size = 10))+
ylab("Average Pieces per Set")+
xlab("Year")+
scale_x_continuous(limits = c(1975, 2022), breaks = seq(1975, 2022, by = 4))+
scale_y_continuous(limits = c(45, 1030), breaks = seq(45, 1030, by = 100))+
guides(color = guide_legend(title.position = "top"), size = guide_legend(title.position = "top"))
