2024 F1 Fantasy League Weekly Summary Document

Summary

This is the running document of F1 Fantasy Stats for the 2024 season.

Race Results

Individual race results are below

Code
DT::datatable(results,
              filter = "top",
              extensions = "Buttons",
              options = list(
                buttons = c("csv", "pdf"),
                pageLength = 10,
                searchCols = list(list(search = ''))
              )
  )

Scores & Prizes

Code
race_prizes <- results |>
  filter(Player != "NA") |>
  group_by(Grand_Prix, Player) %>%
  summarize(Total_Points = sum(Total)) %>%
  group_by(Grand_Prix) %>%
  mutate(
   # rank = rank(-Total_Points),
    Prize = case_when(
    rank(-Total_Points) == 1 ~ 30,
    rank(-Total_Points) == 2 ~ 20,
    rank(-Total_Points) == 3 ~ 10,
    rank(-Total_Points) == 1.5 ~ 25,
    rank(-Total_Points) == 2.5 ~ 15,
    rank(-Total_Points) == 3.5 ~ 5,
    TRUE ~ 0
  )) %>% suppressMessages() #fix ties next

DT::datatable(race_prizes,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))
Code
saveRDS(race_prizes, "data/race_prizes.rds")
Code
cumulative_score <- results |>
  filter(Player != "NA") |>
  group_by(Player) %>%
  summarize(Total_Score = sum(Total)) %>% 
  arrange(-Total_Score) %>%
  suppressMessages()

DT::datatable(cumulative_score,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))
Code
total_prize <- race_prizes %>%
  group_by(Player) %>%
  summarize(Total_Prize = sum(Prize)) %>%
  arrange(-Total_Prize) %>%
  suppressMessages()

DT::datatable(total_prize,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Points Summaries

Mean and 95% CI

Code
plot_data_draft <- results |>
  mutate(Draft_Pos = paste0(Draft_Round,".",Draft_Pos),
         Draft_Pos = ifelse(Draft_Round == "NA", "NA", Draft_Pos)) |>
  group_by(Draft_Pos) %>%
  summarize(mean = mean(Total),
            sd = sd(Total),
            min = min(Total),
            n = n(),
            max = max(Total)) %>%
  mutate(#Draft_Pos = factor(Draft_Pos,
         #                      levels = c(as.character(1:10),
         #                                 paste0("PB",1:5))),
         lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_draft, 
       aes(x = Draft_Pos, y = mean, colour = Draft_Pos)) +
  geom_point() +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points")

ggplotly(p)
Code
plot_data_driver <- results %>%
  rowwise() %>%
  mutate(Points = sum(c(Race_Points, Qualifying, Fastest_Lap, Pole),
                      na.rm = TRUE)) %>%
  group_by(Driver) %>%
  summarize(mean = mean(Points),
            sd = sd(Points),
            min = min(Points),
            n = n(),
            max = max(Points)) %>%
  mutate(lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_driver, 
       aes(x = Driver, y = mean, colour = Driver)) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 45)) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points (No PB Bonus)")

ggplotly(p)

Model

[model description by chatGPT (to update), will run model and associated outputs following race 5]

The hierarchical model used in this analysis is structured as follows:

Let \(y_i\) represent the observed data for the \(i\)-th observation, and \(\mu_i\) represent the underlying true value for that observation.

For each observation \(i\) (ranging from 1 to \(N\)), \(\mu_i\) is assumed to follow a normal distribution with mean \(\mu_{\text{group}}[i]\), where \(\mu_{\text{group}}[i]\) is a group-specific mean corresponding to the group of the \(i\)-th observation. The variance of this normal distribution is \(\tau_{\text{group}}[i]\), representing the group-specific variance.

Additionally, the observed data \(y_i\) is assumed to follow a normal distribution with mean \(\mu_i\) and a common variance parameter \(\tau\).

The group-specific means \(\mu_{\text{group}}[i]\) are assumed to follow a normal distribution with mean \(\mu_{\text{prior}}\) and variance \(\tau_{\mu}\).

The hyperparameters \(\sigma\) and \(\sigma_{\mu}\) represent the standard deviations for the common variance \(\tau\) and the group-specific means’ variance \(\tau_{\mu}\) respectively.

The priors for \(\sigma\) and \(sigma_{\mu}\) are specified as uniform distributions between 0 and 100.

The prior for \(\mu_{\text{prior}}\) is assumed to be a normal distribution with mean 0 and a very small variance to reflect little prior information about the group means.

The hierarchical model aims to estimate the group-specific means and variances, as well as the common variance parameter, using the observed data and specified prior distributions.

Model summary statistics by player:

Code
#fit model
results_jags <- results %>%
  group_by(Grand_Prix, Player) %>%
  summarise(points = sum(Total)) %>%
  filter(Player != "NA") |>
  suppressMessages() 

jags_data <- list(
  player = as.integer(factor(results_jags$Player)),
  y = results_jags$points,
  N = length(results_jags$points),
  J = length(unique(results_jags$Player))
)

model_text <- "
model {
  for (i in 1:N) {
    mu[i] ~ dnorm(mu_group[player[i]], tau_group[player[i]]) # group-specific means
    y[i] ~ dnorm(mu[i], tau) #T(0, )                                 # likelihood
  }

  # Group priors
  for (j in 1:J) {
    mu_group[j] ~ dnorm(mu_prior, tau_mu) #T(0, )    # prior for group mean
    tau_group[j] ~ dgamma(0.001, 0.001)         # prior for group variance (precision)
  }

  # Priors
  tau <- pow(sigma, -2)
  sigma ~ dunif(0, 100)
  tau_mu <- pow(sigma_mu, -2)
  sigma_mu ~ dunif(0, 100)

  mu_prior ~ dnorm(0, 0.001)

}
"


model <- jags.model(textConnection(model_text),
                    data = jags_data,
                    n.chains = 1,
                    quiet = T,
                    inits = list(.RNG.seed = seed,
                                 .RNG.name = "base::Super-Duper"))
update(model, n.iter = 1e5)
mcmc <- coda.samples(model = model, variable.names = c("mu_group"), n.iter = 1e4)
samples <- lapply(mcmc, as_tibble) %>% bind_rows()

#plots
plot_samples <- samples %>%
  pivot_longer(cols = everything()) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         ))

mcmc_stats <- plot_samples %>% group_by(player) %>%
  summarize(
    mean = mean(value),
    sd = sd(value),
    lb = quantile(value, 0.025),
    ub = quantile(value, 0.975)
  ) %>%
  rename(Player = player)

obs_stats <- results_jags %>%
  group_by(Player) %>%
  summarize(mean_obs = mean(points),
            sd_obs = sd(points),
            n = n())

stats <- full_join(obs_stats, mcmc_stats, by = "Player") %>%
  select(
    Player,
    "Player Mean" = mean_obs,
    "Player SD" = sd_obs,
    "Model Mean" = mean,
    "Model SD" = sd,
    "95% CrI LB" = lb,
    "95% CrI UB" = ub
  )

plot_stats <- full_join(obs_stats, mcmc_stats, by = "Player")  %>%
  mutate(lb_obs = mean_obs - 1.96*sd_obs/sqrt(n),
         ub_obs = mean_obs + 1.96*sd_obs/sqrt(n)) %>%
  select(Player, mean_obs, lb_obs, ub_obs, mean, lb, ub)
plot_obs_stats <- plot_stats %>%
  select(Player, mean = mean_obs, lb = lb_obs, ub = ub_obs) %>%
  mutate(stat = "Observed")
plot_model_stats <- plot_stats %>%
  select(Player, mean, lb = lb, ub = ub) %>%
  mutate(stat = "Modeled")
plot_stats_full <- bind_rows(plot_obs_stats, plot_model_stats)

forest_plot <- ggplot(plot_stats_full,
                      aes(x = Player, y = mean, colour = stat)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = lb, ymax = ub),
                width = 0.1,
                position=position_dodge(width=0.5)) +
  geom_hline(yintercept = mean(obs_stats$mean_obs), linetype = "dashed") +
  coord_flip()

ranks <- samples %>%
  rowwise() %>%
  mutate(rank1 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[1],
         rank2 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[2],
         rank3 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[3],
         rank4 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[4],
         rank5 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[5],
         rank6 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[6]) %>%
  pivot_longer(rank1:rank6) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  group_by(player) %>%
  summarize("1" = mean(value == 1),
            "2" = mean(value == 2),
            "3" = mean(value == 3),
            "4" = mean(value == 4),
            "5" = mean(value == 5),
            "6" = mean(value == 6)) %>%
  pivot_longer("1":"6") %>%
  rename(Position = name)

density_plot <- ggplot(plot_samples,
                       aes(x = value, colour = player, fill = player)) +
  geom_density(alpha = 0.5, adjust = 4)

rank_plot <- ggplot(ranks,
                    aes(x = Position, y = value, colour = Position, fill = Position)) +
  geom_bar(stat = "identity", alpha = 0.75) +
  facet_wrap(~player) +
  geom_text(aes(label = round(value, 2)), colour = "black", size = 4)

DT::datatable(stats,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf"))) %>%
  formatRound(2:7, 2)

Summary Plots

Code
forest_plot

Code
density_plot

Code
rank_plot

All Prizes

[To add at end of year]

Code
# final_prize <- total_prize %>%
#   mutate(Most_Extreme = ifelse(Player == "Akshata", 20, 0),
#          Most_Average = ifelse(Player == "Kassi", 20, 0),
#          Second_Place = ifelse(Player == "Kevin", 100, 0),
#          First_Place = ifelse(Player == "Mitch", 200, 0)
#   ) %>% mutate(
#     Total_Prize = rowSums(select(., -Player))
#   )
# 
# entry_cost <- sum(final_prize$Total_Prize)/nrow(final_prize)
# 
# final_prize <- final_prize %>%
#   mutate(Entry_Cost = entry_cost,
#          Cumulative_Total = Total_Prize - Entry_Cost)
# 
# DT::datatable(final_prize %>% select(Player:First_Place),
#               filter = "top",
#               extensions = "Buttons",
#               options = list(buttons = c("csv", "pdf")))

Final Prize

Code
# DT::datatable(final_prize %>% select(c(Player, Total_Prize, Entry_Cost, Cumulative_Total)),
#               filter = "top",
#               extensions = "Buttons",
#               options = list(buttons = c("csv", "pdf")))

Next Draft Order

Code
sample <- samples[sample(1:nrow(samples), 2),]
draft_order <- bind_rows(rank(sample[1,]), rank(sample[2,])) %>%
  mutate(rnd = 1:2) %>%
  pivot_longer(cols = starts_with("mu")) %>%
  arrange(rnd, value) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  select(rnd, pick = value, player)
draft_order$draft_score <- 1:nrow(draft_order)

## temporary code for first five races
# players <- c("Akshata", "Eric", "Forrest", "Kevin", "Melissa", "Mitch")
# rnd1 <- sample(players, length(players), replace = FALSE)
# rnd2 <- sample(players, length(players), replace = FALSE)
# draft_order <- tibble(
#   rnd = rep(c(1, 2), each = length(players)),
#   pick = rep(1:length(players), 2),
#   player = c(rnd1, rnd2)
# )
# draft_order$draft_score <- 1:nrow(draft_order)

draft_pb <- draft_order %>%
  group_by(player) %>%
  summarize(draft_sum = sum(pick),
            draft_rnd1 = min(draft_score)) %>%
  rowwise() %>%
  arrange(-draft_sum, -draft_rnd1) %>%
  select(player) %>%
  ungroup() %>%
  mutate(rnd = 3,
         pick = row_number())

draft_order <- draft_order |> select(-draft_score)

draft_order_full <- bind_rows(
  draft_order, draft_pb
)

knitr::kable(draft_order_full)
rnd pick player
1 1 Kevin
1 2 Mitch
1 3 Forrest
1 4 Akshata
1 5 Melissa
1 6 Eric
2 1 Mitch
2 2 Kevin
2 3 Eric
2 4 Forrest
2 5 Akshata
2 6 Melissa
3 1 Melissa
3 2 Eric
3 3 Akshata
3 4 Forrest
3 5 Mitch
3 6 Kevin
Code
saveRDS(draft_order_full, "draft_order.rds")

Reminder - can’t draft list:

Code
last_race <- results$Grand_Prix[nrow(results)]

cant_draft <- results %>%
  filter(Grand_Prix == last_race) %>%
  select(Player, "Can't Draft" = Driver) %>%
  arrange(Player) 

cant_draft %>% knitr::kable()
Player Can’t Draft
Akshata Leclerc
Akshata Stroll
Akshata Bottas
Eric Norris
Eric Alonso
Eric Gasly
Forrest Hamilton
Forrest Perez
Forrest Ocon
Kevin Verstappen
Kevin Sainz
Kevin Sargent
Melissa Piastri
Melissa Tsunoda
Melissa Albon
Mitch Russell
Mitch Hulkenburg
Mitch Ricciardo
NA Magnussen
NA Guanyu
Code
#cant_draft <- NULL #temporary for week 1
saveRDS(cant_draft, "cant_draft.rds")