#Downloading data

library(tidytuesdayR)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
tuesdata <- tidytuesdayR::tt_load(2025, week = 38)
## ---- Compiling #TidyTuesday Information for 2025-09-23 ----
## --- There are 2 files available ---
## 
## 
## ── Downloading files ───────────────────────────────────────────────────────────
## 
##   1 of 2: "fide_ratings_august.csv"
##   2 of 2: "fide_ratings_september.csv"
fide_ratings_august <- tuesdata$fide_ratings_august
fide_ratings_september <- tuesdata$fide_ratings_september
view(fide_ratings_september)
glimpse(fide_ratings_september)
## Rows: 203,191
## Columns: 12
## $ id     <dbl> 53707043, 53200465, 5716365, 53200553, 5045886, 10291695, 10287…
## $ name   <chr> "A Darshil", "A F M Ehteshamul, Hoque (tuhin", "A Hamid, Harman…
## $ fed    <chr> "IND", "BAN", "MAS", "BAN", "IND", "BAN", "BAN", "BAN", "IND", …
## $ sex    <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M"…
## $ title  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "IM", NA, NA, NA, N…
## $ wtitle <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ otitle <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ foa    <chr> NA, NA, NA, NA, NA, NA, NA, NA, "AFM", NA, NA, NA, NA, NA, NA, …
## $ rating <dbl> 1412, 1797, 1552, 1607, 1747, 1614, 1664, 1802, 1644, 1871, 169…
## $ games  <dbl> 0, 0, 0, 0, 0, 0, 8, 5, 0, 9, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, …
## $ k      <dbl> 40, 40, 20, 40, 20, 40, 40, 40, 40, 20, 20, 10, 20, 20, 40, 40,…
## $ bday   <dbl> 2013, 1977, 1970, 1995, 1964, 2008, 2006, 2015, 2013, 1978, 195…

#Q What is the distribution of players by Federation?

fide_ratings_september |>
  ggplot(aes(x = fed) )+
  geom_bar()

#Whoa! Thats a whole lot of federations Just how many federations do players come from? #create a table

freq_table<-table(fide_ratings_september$fed)
sorted_table <- sort(freq_table, decreasing = TRUE)
view(sorted_table)

#A Spain had the highest number of players in this list!

#Q What is the age and gender distribution of the players? #need to find age for this

new_sept <- fide_ratings_september |> 
  mutate(age = abs(bday - 2025)) 

#abs function removes the sign

new_sept |> 
  ggplot(aes(x = age, colour = sex))+
  geom_histogram(binwidth = 10)

#A Most are young boys! #Q What is the number of players who played in September?

new_sept |> 
  count(games) 
## # A tibble: 38 × 2
##    games      n
##    <dbl>  <int>
##  1     0 158441
##  2     1   2056
##  3     2   1834
##  4     3   2505
##  5     4   3343
##  6     5   5222
##  7     6   4930
##  8     7   5636
##  9     8   4838
## 10     9   7832
## # ℹ 28 more rows

#A 158441 of 203,191 didn’t play. 44750 did. #Q Who are the top ten players, their federation and what is their ranking?

arranged_new_sept <- new_sept |> 
  select(name, fed, rating) |> 
  arrange(desc(rating))

#Q Who are the top ten women, their federation and what is their ranking?

 women_new_sept <- new_sept |> 
  filter(sex == "F") |> 
  select(name, fed, rating) |> 
  arrange(desc(rating))

#Q Which players have any of Titles, Wtitle and Otitles?

titles_sept <- new_sept |> 
  select(name, fed, title, wtitle, otitle) |> 
  subset(title != "NA" | wtitle != "NA" | otitle != "NA")

#Q Which players have all three of Titles, Wtitle and Otitles?

all_titles <- new_sept |> 
  select(name, fed, title, wtitle, otitle) |> 
  subset(title != "NA" & wtitle != "NA" & otitle != "NA")