## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'openintro' was built under R version 4.4.3
I was looking for a simple dataset with count data for chess players transfer to the U.S.
library(readr)
# Option 1: Double backslashes
chess_df <- read_csv('https://raw.githubusercontent.com/tanzil64/Tanzil_Tidyverse/refs/heads/main/chess_transfers.csv')## Rows: 932 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): url, Federation, Form.Fed, Transfer Date
## dbl (1): ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## spc_tbl_ [932 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ url : chr [1:932] "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" ...
## $ ID : num [1:932] 2019221 14401754 14401762 2019221 14401754 ...
## $ Federation : chr [1:932] "USA" "BIH" "BIH" "USA" ...
## $ Form.Fed : chr [1:932] "PHI" "CRO" "YUG" "PHI" ...
## $ Transfer Date: chr [1:932] "12/15/00" "1/31/00" "1/31/00" "12/15/00" ...
## - attr(*, "spec")=
## .. cols(
## .. url = col_character(),
## .. ID = col_double(),
## .. Federation = col_character(),
## .. Form.Fed = col_character(),
## .. `Transfer Date` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Clean column names
chess_df <- chess_df %>%
janitor::clean_names()
# View cleaned column names
names(chess_df)## [1] "url" "id" "federation" "form_fed"
## [5] "transfer_date"
# Extract year from date
chess_df <- chess_df %>%
mutate(
parsed_date = parse_date_time(transfer_date, orders = c("ymd", "dmy", "mdy")),
transfer_year = year(parsed_date)
)
# Count transfers per year
transfers_per_year <- chess_df %>%
count(transfer_year)
chess_df <- chess_df %>%
filter(!is.na(form_fed) & !is.na(form_fed))## tibble [923 × 7] (S3: tbl_df/tbl/data.frame)
## $ url : chr [1:923] "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" "https://ratings.fide.com/fedchange.phtml?year=2000" ...
## $ id : num [1:923] 2019221 14401754 14401762 2019221 14401754 ...
## $ federation : chr [1:923] "USA" "BIH" "BIH" "USA" ...
## $ form_fed : chr [1:923] "PHI" "CRO" "YUG" "PHI" ...
## $ transfer_date: chr [1:923] "12/15/00" "1/31/00" "1/31/00" "12/15/00" ...
## $ parsed_date : POSIXct[1:923], format: "2000-12-15" "2000-01-31" ...
## $ transfer_year: num [1:923] 2000 2000 2000 2000 2000 ...
ggplot(transfers_per_year, aes(x = transfer_year, y = n)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "darkred", size = 2) +
labs(title = "Chess Player Transfers Over Time",
x = "Year",
y = "Number of Transfers") +
theme_minimal()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Count transfers out by 'form_fed' and select top 10
top_federations <- chess_df %>%
count(form_fed, sort = TRUE) %>%
slice_max(n, n = 10)
ggplot(top_federations, aes(x = reorder(form_fed, n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Federations by Transfers Out",
x = "From Federation",
y = "Number of Transfers") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.text = element_text(size = 12)
)
## Sorting by Number of Transfers
chess_df %>%
count(form_fed) %>%
ggplot(aes(x = fct_reorder(form_fed, n), y = n)) +
geom_col(fill = "lightblue") +
labs(x = "Destination Federation",
y = "Number of Transfers",
caption = "Data: FIDE Federation Transfers") +
coord_flip()
## Grouping Less Frequent Federations
top_feds <- chess_df %>%
count(form_fed) %>%
arrange(desc(n)) %>%
slice_head(n = 5) %>%
pull(form_fed)
chess_df %>%
count(form_fed) %>%
mutate(form_fed = fct_other(form_fed, keep = top_feds, other_level = "Other Federations")) %>%
group_by(form_fed) %>%
summarise(n = sum(n)) %>%
ggplot(aes(x = fct_reorder(form_fed, n), y = n)) +
geom_col(fill = "lightblue") +
labs(x = "Destination Federation",
y = "Number of Transfers",
caption = "Data: FIDE Federation Transfers") +
coord_flip()chess_df %>%
count(form_fed) %>%
mutate(form_fed = fct_other(form_fed, keep = top_feds, other_level = "Other Federations")) %>%
group_by(form_fed) %>%
summarise(n = sum(n)) %>%
ggplot(aes(x = fct_reorder(form_fed, n), y = n)) +
geom_col(fill = "lightblue") +
labs(x = "Destination Federation",
y = "Number of Transfers",
title = "Top Chess Federation Transfer Destinations",
subtitle = paste0(min(chess_df$transfer_year), "-", max(chess_df$transfer_year)),
caption = "Data: FIDE Federation Transfers") +
theme(plot.title = element_text(size = 18, color = "blue"),
plot.subtitle = element_text(size = 14, color = "gold")) +
coord_flip()