Tidy Tuesday Week 22 Mario Kart 64 World Records, data from Mario Kart World Records

# Load libaries 
library(tidyverse)
library(ggtext)
library(ggpubr)
library(ggstatsplot)
library(gt)
library(ggsci)
library(wesanderson)

theme_set(theme_minimal())
# Import data
records <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-25/records.csv')

── Column specification ──────────────────────────────────────────────────────────────────────────────────
cols(
  track = col_character(),
  type = col_character(),
  shortcut = col_character(),
  player = col_character(),
  system_played = col_character(),
  date = col_date(format = ""),
  time_period = col_character(),
  time = col_double(),
  record_duration = col_double()
)
drivers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-25/drivers.csv')

── Column specification ──────────────────────────────────────────────────────────────────────────────────
cols(
  position = col_double(),
  player = col_character(),
  total = col_double(),
  year = col_double(),
  records = col_double(),
  nation = col_character()
)
# Which Mario Kart Track Is The Fastest?
# shared on [Twitter](https://twitter.com/leeolney3/status/1396980461134614528/photo/1)

table1 = records %>% 
  group_by(track, type, shortcut) %>%
  summarise(time_min=min(time)) %>%
  ungroup() %>%
  mutate(type_sc= ifelse(shortcut=="Yes", paste(type, "with","Shortcut"),type))

# dot plot
table1 %>% 
  ggplot(aes(x=fct_rev(fct_reorder(track, time_min, .fun='min')), y=time_min, 
             color=factor(type_sc, levels=c("Three Lap with Shortcut",
                                            "Single Lap with Shortcut",
                                            "Single Lap",
                                            "Three Lap")))) +
  geom_line(aes(group=track), color="grey",size=2,alpha=0.4) +
  #geom_point(position=position_dodge(0.4),size=2.2) +
  geom_point(position = position_jitterdodge(dodge.width = 0.5, jitter.height = 0.5),size=2.7)  +
  theme(legend.position="top",
        legend.justification = "left",
        plot.title.position = "plot",
        plot.title=element_text(hjust=0.5, face="bold",size=18),
        plot.margin=ggplot2::margin(1,1,0.5,1,"cm"),
        axis.title.x=element_markdown(size=10.5),
        axis.title.y=element_markdown(size=10.5)) + 
  labs(color="", x="**Track**",y="**Minimum Time** (in seconds)<br>",
       title="Which Mario Kart Track Is The Fastest?",
       caption="Tidy Tuesday Week 22 | Data from Mario Kart World Records") + 
  scale_color_manual(values=c("#f95738","#ffa62b","#0091ad","#43B047")) + 
  coord_flip() 

# For how many tracks have shortcuts been discovered?
records %>% 
  count(track, shortcut) %>%
  count(shortcut)
# For which track did the world record improve the most?
records %>% 
 group_by(track) %>% 
  summarise(min_time = min(time), max_time=max(time), improved=max_time-min_time) %>%
  arrange(desc(improved)) %>%
  mutate(improved_pct= round(improved/max_time*100,3)) %>%
  arrange(desc(improved_pct)) %>% slice(1)
# On which track the shortcut saves the most time 
table2 = records %>% 
  group_by(track, type, shortcut) %>%
  summarise(time=mean(time)) %>%
  pivot_wider(names_from = shortcut,values_from=time) %>%
  filter(!is.na(Yes)) %>%
  mutate(time_saved=No-Yes) %>% 
  arrange(desc(time_saved))

# time saved by race type
by(table2$time_saved, table2$type, summary) #no difference in Single lap with and without shortcut
table2$type: Single Lap
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0       0       0       0       0       0 
------------------------------------------------------------------------------- 
table2$type: Three Lap
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.516  38.977  59.911  78.928 102.340 242.578 
# On which track the shortcut saves the most time 
# dot plot: three lap
table4 = records %>% 
  group_by(track, type, shortcut) %>%
  summarise(time=mean(time)) %>% 
  filter(type=="Three Lap") %>%
  filter(track!="Banshee Boardwalk",track!="Bowser's Castle",
         track!="Koopa Troopa Beach",track!="Moo Moo Farm") %>%
  mutate(time_saved=lag(time)-time) %>%
  mutate(pct = round(time_saved/lag(time),3)) 

table5 = table4 %>% filter(shortcut=="Yes")

table5 %>%
  ggplot(aes(y=reorder(track,pct), x=time)) + 
  geom_text(aes(label=paste0("-",scales::percent(pct,accuracy=0.1L))),size=2.9, hjust=1.2, color="#3C5488FF") +
  geom_point(data=table4, aes(color=shortcut),size=2,show.legend = F) + 
  geom_line(data=table4, aes(group=track), alpha=0.3,size=1) + 
  scale_x_continuous(limits=c(0,370)) + 
  theme(panel.grid.major.x=element_blank(),
       panel.grid.minor.x=element_blank(),
       axis.title.x=element_markdown(size=10),
       axis.title.y=element_markdown(size=10),
       plot.title.position = "plot",
       plot.subtitle=element_markdown(size=10)) + 
  labs(x="**Average time** (in seconds)",
       y="**Track**",
       title="On which track the shortcut saves the most time for 3-lap?",
       subtitle="Percentage is expressed as (<span style = 'color:#E64B35FF'>No Shortcut</span> - <span style = 'color:#3C5488FF'>With Shortcut</span>) / <span style = 'color:#E64B35FF'>No Shortcut</span><br>") + 
  scale_color_manual(values=c("#E64B35FF","#3C5488FF"))

# On which track the shortcut saves the most time 
# highlighted slope chart: three lap
table2 %>%
  filter(type=="Three Lap") %>%
  pivot_longer(No:Yes) %>%
  mutate(name=ifelse(name=="No","No Shortcut","With Shortcut")) %>%
  mutate(value=round(value,1), time_saved=round(time_saved,1)) -> table3

table3 %>%
  ggplot(aes(x=name, y=value, group=track)) + 
  geom_line(aes(color=I(ifelse(track=="Wario Stadium", '#E64B35FF', '#8491b4FF')))) + 
  geom_point(aes(color=I(ifelse(track=="Wario Stadium", '#E64B35FF', '#8491b4FF')))) + 
  theme(legend.position = "none") + 
  scale_x_discrete(position="top") +
  geom_text(data= table3 %>% filter(track=="Wario Stadium") %>% filter(name=="No Shortcut"), 
            aes(label=paste0(value,"s")), size=3, color="#E64B35FF", hjust=1.2) +
  geom_text(data= table3 %>% filter(track=="Wario Stadium") %>% filter(name=="With Shortcut"), 
            aes(label=paste0(value,"s")), size=3, color="#E64B35FF", hjust=-0.5) + 
  geom_text(aes(x="No Shortcut",y=265.2, label="Wario Stadium"),size=3, color="#E64B35FF",hjust=1.7) + 
  theme(axis.text.x=element_text(face="bold",size=10, color="black"),
        axis.title.y=element_markdown(size=10),
        plot.title.position = "plot") +
  labs(x="",y="**Average time saved** (in seconds)",
       title="On which track the shortcut saves the most time for 3-lap?")

# When were shortcuts discovered?
records %>% group_by(track, shortcut) %>%
  summarise(sc_dis=min(date)) %>%
  filter(shortcut=="Yes") %>%
  mutate(shortcut=ifelse(shortcut=="Yes",1,"")) %>%
  ggplot(aes(y=shortcut,x=sc_dis)) + 
  geom_segment(aes(x=min(sc_dis), xend=max(sc_dis), y=shortcut, yend=shortcut)) +
  geom_point(aes(color=factor(sc_dis)),size=6,show.legend=F, shape=18) + 
  geom_text(aes(label=sc_dis), size=3, vjust=3, color="black") + 
  geom_text(aes(x=min(sc_dis),label="Luigi Raceway",y=1.03), size=3, color="#E64B35FF") + 
  geom_text(aes(x=as.Date(c("1997-03-07")),label="Rainbow Road\nYoshi Valley",y=1.048), 
            size=3, color="#00A087FF", hjust=0,nudge_x=-1.6) +
  geom_text(aes(x=as.Date(c("1997-03-10")),label="Choco Mountain\nD.K.'s Jungle Parkway\nFrappe Snowland\nKalimari Desert\nMario Raceway\nRoyal Raceway\nSherbet Land\nToad's Turnpike\nWario Stadium",y=1.125), size=3,hjust=0,nudge_x=-1.1, color="#3C5488FF") +
  scale_y_continuous(limits=c(0.85,1.35)) +
  scale_x_date(labels=scales::date_format("%d-%m-%Y"), limits=as.Date(c('1997-02-13','1997-03-14')),
               expand = c(0, 0), breaks = "1 week") +
  theme(axis.text=element_blank(),
        axis.title = element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.major.x=element_line(size=0.35)) + 
  geom_bracket(xmin=as.Date(c("1997-02-16")), xmax=as.Date(c("1997-03-07")), y.position=1.1, 
               label="19 days", label.size=3) +
  geom_bracket(xmin=as.Date(c("1997-03-07")), xmax=as.Date(c("1997-03-10")), y.position=1.25, 
               label="3 days", label.size=3) + 
  scale_color_manual(values=c("#E64B35FF","#00A087FF","#3C5488FF")) + 
  labs(title="When were shortcuts discovered?")

NA
# Which is the longest standing world record?
records %>% arrange(desc(record_duration)) %>% slice(1)
# Distribution of record duration across race types
ggbetweenstats(data=records, x=type, y=record_duration,
               title="Distribution of record duration across race types",
               type="np",
               plotgrid.args=list(nrow=1),
               messages=FALSE,
               results.subtitle = FALSE,
               xlab="Type",
               ylab="Record duration",
               point.args = list(position = ggplot2::position_jitterdodge(dodge.width = 0.6), 
                                 alpha= 0.4, size = 2, stroke = 0)) + 
  ggplot2::scale_color_manual(values=c("#4DBBD5FF","#00A087FF"))

# How did the world records develop over time?
# count of world records over time
#records %>% 
  #mutate(type_sc= ifelse(shortcut=="Yes", paste(type, "with","Shortcut"),type)) %>%
  #group_by(date) %>% tally() %>% ggscatterhist(x="date",y="n", size=1, margin.plot="density")


# How did the world records develop over time?
# count of records over time by type and shortcut 
records %>% 
  mutate(type_sc= ifelse(shortcut=="Yes", paste(type, "with","Shortcut"),type)) %>%
  group_by(date) %>% count(type_sc) %>%
  ggplot(aes(x=date, y=n, color=type_sc)) + 
  geom_point(size=1,show.legend=F, alpha=0.9) + 
  facet_wrap(~type_sc,ncol=2) + 
  scale_color_npg() + 
  theme(panel.grid.minor = element_blank(),
        axis.title.x=element_markdown(size=10),
        axis.title.y=element_markdown(size=10),
        plot.title.position="plot",
        strip.text = element_text(face="bold",color="#343a40")
        ) + 
  labs(x="**Date**",y="**Record count**", title="Records over time by lap type and shortcut")

# Which is the longest standing world record?
records %>% filter(record_duration==max(record_duration))
# Who is the player with the most world records?
drivers %>% distinct(player, total) %>% slice(1)
# Who are recent players? (players in 2021)
records %>% filter(date>"2020-12-31") %>% 
  group_by(player) %>% 
  summarise(latest_date=max(date), record_count=length(player)) 
# Unique and new player count across the years
# reference:https://jack-davison.github.io/posts/2021-05-25-exploring-fun-questions-tidytuesday-2021-week-22-mario-kart-64/

d1 = drivers %>% distinct(player, year, records) %>%
  drop_na() %>% group_by(year) %>% summarise(unique_player=n_distinct(player))

d2 = drivers %>% distinct(player, year, records) %>%
  drop_na() %>% group_by(player) %>%
  filter(year==min(year)) %>% 
  ungroup() %>% count(year) %>% rename(new_player=n)

d1 %>% left_join(d2) %>% replace(is.na(.), 0) %>%
  pivot_longer(unique_player:new_player) %>%
  mutate(name=ifelse(name=="unique_player","Unique Player","New Player")) %>%
  ggplot(aes(x=year, y=value, fill=value)) + 
  geom_col() + 
  facet_grid(~fct_rev(name)) + 
  scale_x_continuous(
    breaks = seq(min(drivers$year), max(drivers$year), 4)) + 
  scale_fill_gradientn(colours = wes_palette("Zissou1", 25, type = "continuous")) + 
  theme(panel.grid.minor = element_blank(),
        axis.title.x=element_markdown(size=10),
        axis.title.y=element_markdown(size=10),
        plot.title.position="plot",
        legend.position="none",
        strip.text=element_text(face="bold",size=10),
        axis.text.x = element_text(vjust =5)) + 
  labs(x="**Year**",y="**Count**",
       title="Unique and new player count across the years\n")

# Which is the fastest track?
records %>% group_by(type,track) %>% summarise(min_time=min(time)) %>% 
  pivot_wider(names_from=type, values_from=min_time) %>% 
  rename(Track=track) %>% 
  ungroup() %>% 
  DT::datatable(rownames=FALSE,options = list(order = list(list(1, 'asc'))))
# Maximum Record by track and lap type  
# reference: https://twitter.com/Juanma_MN/status/1397249648931360768/photo/1
records %>% group_by(track, type) %>% summarise(max_duration=max(record_duration)) %>%
  ggplot(aes(y=reorder(track,max_duration, max), x=max_duration, color=type)) + 
  geom_point(size=2) + 
  geom_line(aes(group=track), color="grey") + 
  scale_color_manual(values=c("#E64B35FF","#3C5488FF")) + 
  theme(axis.title.x=element_markdown(size=10),
        axis.title.y=element_markdown(size=10),
        plot.title = element_markdown(),
        plot.title.position="plot",
        legend.position="none") + 
  labs(y="**Track**",x="**Maximum record duration** (in days)",
       title="Maximum record duration of <span style = 'color:#E64B35FF'>Single Lap</span> and <span style = 'color:#3C5488FF'>Three Lap</span> races, by Track",
       subtitle="")

