#Downloading data
library(tidytuesdayR)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
tuesdata <- tidytuesdayR::tt_load(2025, week = 38)
## ---- Compiling #TidyTuesday Information for 2025-09-23 ----
## --- There are 2 files available ---
##
##
## ── Downloading files ───────────────────────────────────────────────────────────
##
## 1 of 2: "fide_ratings_august.csv"
## 2 of 2: "fide_ratings_september.csv"
fide_ratings_august <- tuesdata$fide_ratings_august
fide_ratings_september <- tuesdata$fide_ratings_september
view(fide_ratings_september)
glimpse(fide_ratings_september)
## Rows: 203,191
## Columns: 12
## $ id <dbl> 53707043, 53200465, 5716365, 53200553, 5045886, 10291695, 10287…
## $ name <chr> "A Darshil", "A F M Ehteshamul, Hoque (tuhin", "A Hamid, Harman…
## $ fed <chr> "IND", "BAN", "MAS", "BAN", "IND", "BAN", "BAN", "BAN", "IND", …
## $ sex <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M"…
## $ title <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "IM", NA, NA, NA, N…
## $ wtitle <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ otitle <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ foa <chr> NA, NA, NA, NA, NA, NA, NA, NA, "AFM", NA, NA, NA, NA, NA, NA, …
## $ rating <dbl> 1412, 1797, 1552, 1607, 1747, 1614, 1664, 1802, 1644, 1871, 169…
## $ games <dbl> 0, 0, 0, 0, 0, 0, 8, 5, 0, 9, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, …
## $ k <dbl> 40, 40, 20, 40, 20, 40, 40, 40, 40, 20, 20, 10, 20, 20, 40, 40,…
## $ bday <dbl> 2013, 1977, 1970, 1995, 1964, 2008, 2006, 2015, 2013, 1978, 195…
#Q What is the distribution of players by Federation?
fide_ratings_september |>
ggplot(aes(x = fed) )+
geom_bar()
#Whoa! Thats a whole lot of federations Just how many federations do
players come from? #create a table
freq_table<-table(fide_ratings_september$fed)
sorted_table <- sort(freq_table, decreasing = TRUE)
view(sorted_table)
#A Spain had the highest number of players in this list!
#Q What is the age and gender distribution of the players? #need to find age for this
new_sept <- fide_ratings_september |>
mutate(age = abs(bday - 2025))
#abs function removes the sign
new_sept |>
ggplot(aes(x = age, colour = sex))+
geom_histogram(binwidth = 10)
#A Most are young boys! #Q What is the number of players who played in
September?
new_sept |>
count(games)
## # A tibble: 38 × 2
## games n
## <dbl> <int>
## 1 0 158441
## 2 1 2056
## 3 2 1834
## 4 3 2505
## 5 4 3343
## 6 5 5222
## 7 6 4930
## 8 7 5636
## 9 8 4838
## 10 9 7832
## # ℹ 28 more rows
#A 158441 of 203,191 didn’t play. 44750 did. #Q Who are the top ten players, their federation and what is their ranking?
arranged_new_sept <- new_sept |>
select(name, fed, rating) |>
arrange(desc(rating))
#Q Who are the top ten women, their federation and what is their ranking?
women_new_sept <- new_sept |>
filter(sex == "F") |>
select(name, fed, rating) |>
arrange(desc(rating))
#Q Which players have any of Titles, Wtitle and Otitles?
titles_sept <- new_sept |>
select(name, fed, title, wtitle, otitle) |>
subset(title != "NA" | wtitle != "NA" | otitle != "NA")
#Q Which players have all three of Titles, Wtitle and Otitles?
all_titles <- new_sept |>
select(name, fed, title, wtitle, otitle) |>
subset(title != "NA" & wtitle != "NA" & otitle != "NA")