ultra_rankings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-10-26/ultra_rankings.csv')
race <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-10-26/race.csv')
*Consolidating and some minor cleaning for the two datasets available in this week
consolidated = race %>%
left_join(ultra_rankings,by = "race_year_id") %>%
mutate(runner = str_to_title(runner),
country = str_replace(country,"(.+)?(United States)(.+)?","United States"),
country = str_replace(country,"(.+)?(China)(.+)?","China"),
country = str_replace(country,"(.+)?(Japan)(.+)?","Japan"),
year = year(date))
consolidated %>%
group_by(country, year) %>%
filter(!is.na(gender),
!is.na(country)) %>%
summarise(total = n(),
women = sum(gender=="W"),
ratio_women = 100*(women/total)) %>%
arrange(-total) %>%
head(40) %>%
mutate(country = fct_reorder(country,ratio_women),
year = as.integer(year)) %>%
ggplot(aes(ratio_women,country, fill =country))+
geom_col()+
ease_aes()+
# scale_x_discrete(labels = percent())+
labs(title = "% of Women Participation in Ultra Trail Running within countries \n with highest # of participants ",
subtitle = "Year of events = {frame_time}",
x = "% Women participation",
caption = "Plot by @Birasafab")+
transition_time(year)+
# transition_reveal(along = year)+
theme(legend.position = "none")