2024 F1 Fantasy League Weekly Summary Document

Summary

This is the running document of F1 Fantasy Stats for the 2024 season.

Race Results

Individual race results are below

Code

DT::datatable(results,
              filter = "top",
              extensions = "Buttons",
              options = list(
                buttons = c("csv", "pdf"),
                pageLength = 10,
                searchCols = list(list(search = ''))
              )
  )

Scores & Prizes

Code

race_prizes <- results |>
  filter(Player != "NA") |>
  group_by(Grand_Prix, Player) %>%
  summarize(Total_Points = sum(Total)) %>%
  group_by(Grand_Prix) %>%
  mutate(
   # rank = rank(-Total_Points),
    Prize = case_when(
    rank(-Total_Points) == 1 ~ 30,
    rank(-Total_Points) == 2 ~ 20,
    rank(-Total_Points) == 3 ~ 10,
    rank(-Total_Points) == 1.5 ~ 25,
    rank(-Total_Points) == 2.5 ~ 15,
    rank(-Total_Points) == 3.5 ~ 5,
    TRUE ~ 0
  )) %>% suppressMessages() #fix ties next

DT::datatable(race_prizes,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Code

saveRDS(race_prizes, "data/race_prizes.rds")

Code

cumulative_score <- results |>
  filter(Player != "NA") |>
  group_by(Player) %>%
  summarize(Total_Score = sum(Total)) %>% 
  arrange(-Total_Score) %>%
  suppressMessages()

DT::datatable(cumulative_score,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Code

total_prize <- race_prizes %>%
  group_by(Player) %>%
  summarize(Total_Prize = sum(Prize)) %>%
  arrange(-Total_Prize) %>%
  suppressMessages()

DT::datatable(total_prize,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Points Summaries

Mean and 95% CI

Code

plot_data_draft <- results |>
  mutate(Draft_Pos = paste0(Draft_Round,".",Draft_Pos),
         Draft_Pos = ifelse(Draft_Round == "NA", "NA", Draft_Pos)) |>
  group_by(Draft_Pos) %>%
  summarize(mean = mean(Total),
            sd = sd(Total),
            min = min(Total),
            n = n(),
            max = max(Total)) %>%
  mutate(#Draft_Pos = factor(Draft_Pos,
         #                      levels = c(as.character(1:10),
         #                                 paste0("PB",1:5))),
         lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_draft, 
       aes(x = Draft_Pos, y = mean, colour = Draft_Pos)) +
  geom_point() +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points")

ggplotly(p)

Code

plot_data_driver <- results %>%
  rowwise() %>%
  mutate(Points = sum(c(Race_Points, Qualifying, Fastest_Lap, Pole),
                      na.rm = TRUE)) %>%
  group_by(Driver) %>%
  summarize(mean = mean(Points),
            sd = sd(Points),
            min = min(Points),
            n = n(),
            max = max(Points)) %>%
  mutate(lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_driver, 
       aes(x = Driver, y = mean, colour = Driver)) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 45)) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points (No PB Bonus)")

ggplotly(p)

Model

[model description by chatGPT (to update), will run model and associated outputs following race 5]

The hierarchical model used in this analysis is structured as follows:

Let \(y_i\) represent the observed data for the \(i\)-th observation, and \(\mu_i\) represent the underlying true value for that observation.

For each observation \(i\) (ranging from 1 to \(N\)), \(\mu_i\) is assumed to follow a normal distribution with mean \(\mu_{\text{group}}[i]\), where \(\mu_{\text{group}}[i]\) is a group-specific mean corresponding to the group of the \(i\)-th observation. The variance of this normal distribution is \(\tau_{\text{group}}[i]\), representing the group-specific variance.

Additionally, the observed data \(y_i\) is assumed to follow a normal distribution with mean \(\mu_i\) and a common variance parameter \(\tau\).

The group-specific means \(\mu_{\text{group}}[i]\) are assumed to follow a normal distribution with mean \(\mu_{\text{prior}}\) and variance \(\tau_{\mu}\).

The hyperparameters \(\sigma\) and \(\sigma_{\mu}\) represent the standard deviations for the common variance \(\tau\) and the group-specific means’ variance \(\tau_{\mu}\) respectively.

The priors for \(\sigma\) and \(sigma_{\mu}\) are specified as uniform distributions between 0 and 100.

The prior for \(\mu_{\text{prior}}\) is assumed to be a normal distribution with mean 0 and a very small variance to reflect little prior information about the group means.

The hierarchical model aims to estimate the group-specific means and variances, as well as the common variance parameter, using the observed data and specified prior distributions.

Model summary statistics by player:

Code

#fit model
results_jags <- results %>%
  group_by(Grand_Prix, Player) %>%
  summarise(points = sum(Total)) %>%
  filter(Player != "NA") |>
  suppressMessages() 

jags_data <- list(
  player = as.integer(factor(results_jags$Player)),
  y = results_jags$points,
  N = length(results_jags$points),
  J = length(unique(results_jags$Player))
)

model_text <- "
model {
  for (i in 1:N) {
    mu[i] ~ dnorm(mu_group[player[i]], tau_group[player[i]]) # group-specific means
    y[i] ~ dnorm(mu[i], tau) #T(0, )                                 # likelihood
  }

  # Group priors
  for (j in 1:J) {
    mu_group[j] ~ dnorm(mu_prior, tau_mu) #T(0, )    # prior for group mean
    tau_group[j] ~ dgamma(0.001, 0.001)         # prior for group variance (precision)
  }

  # Priors
  tau <- pow(sigma, -2)
  sigma ~ dunif(0, 100)
  tau_mu <- pow(sigma_mu, -2)
  sigma_mu ~ dunif(0, 100)

  mu_prior ~ dnorm(0, 0.001)

}
"


model <- jags.model(textConnection(model_text),
                    data = jags_data,
                    n.chains = 1,
                    quiet = T,
                    inits = list(.RNG.seed = seed,
                                 .RNG.name = "base::Super-Duper"))
update(model, n.iter = 1e5)
mcmc <- coda.samples(model = model, variable.names = c("mu_group"), n.iter = 1e4)
samples <- lapply(mcmc, as_tibble) %>% bind_rows()

#plots
plot_samples <- samples %>%
  pivot_longer(cols = everything()) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         ))

mcmc_stats <- plot_samples %>% group_by(player) %>%
  summarize(
    mean = mean(value),
    sd = sd(value),
    lb = quantile(value, 0.025),
    ub = quantile(value, 0.975)
  ) %>%
  rename(Player = player)

obs_stats <- results_jags %>%
  group_by(Player) %>%
  summarize(mean_obs = mean(points),
            sd_obs = sd(points),
            n = n())

stats <- full_join(obs_stats, mcmc_stats, by = "Player") %>%
  select(
    Player,
    "Player Mean" = mean_obs,
    "Player SD" = sd_obs,
    "Model Mean" = mean,
    "Model SD" = sd,
    "95% CrI LB" = lb,
    "95% CrI UB" = ub
  )

plot_stats <- full_join(obs_stats, mcmc_stats, by = "Player")  %>%
  mutate(lb_obs = mean_obs - 1.96*sd_obs/sqrt(n),
         ub_obs = mean_obs + 1.96*sd_obs/sqrt(n)) %>%
  select(Player, mean_obs, lb_obs, ub_obs, mean, lb, ub)
plot_obs_stats <- plot_stats %>%
  select(Player, mean = mean_obs, lb = lb_obs, ub = ub_obs) %>%
  mutate(stat = "Observed")
plot_model_stats <- plot_stats %>%
  select(Player, mean, lb = lb, ub = ub) %>%
  mutate(stat = "Modeled")
plot_stats_full <- bind_rows(plot_obs_stats, plot_model_stats)

forest_plot <- ggplot(plot_stats_full,
                      aes(x = Player, y = mean, colour = stat)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = lb, ymax = ub),
                width = 0.1,
                position=position_dodge(width=0.5)) +
  geom_hline(yintercept = mean(obs_stats$mean_obs), linetype = "dashed") +
  coord_flip()

ranks <- samples %>%
  rowwise() %>%
  mutate(rank1 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[1],
         rank2 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[2],
         rank3 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[3],
         rank4 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[4],
         rank5 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[5],
         rank6 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[6]) %>%
  pivot_longer(rank1:rank6) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  group_by(player) %>%
  summarize("1" = mean(value == 1),
            "2" = mean(value == 2),
            "3" = mean(value == 3),
            "4" = mean(value == 4),
            "5" = mean(value == 5),
            "6" = mean(value == 6)) %>%
  pivot_longer("1":"6") %>%
  rename(Position = name)

density_plot <- ggplot(plot_samples,
                       aes(x = value, colour = player, fill = player)) +
  geom_density(alpha = 0.5, adjust = 4)

rank_plot <- ggplot(ranks,
                    aes(x = Position, y = value, colour = Position, fill = Position)) +
  geom_bar(stat = "identity", alpha = 0.75) +
  facet_wrap(~player) +
  geom_text(aes(label = round(value, 2)), colour = "black", size = 4)

DT::datatable(stats,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf"))) %>%
  formatRound(2:7, 2)

Summary Plots

Code

forest_plot

Code

density_plot

Code

rank_plot

All Prizes

[To add at end of year]

Code

# final_prize <- total_prize %>%
#   mutate(Most_Extreme = ifelse(Player == "Akshata", 20, 0),
#          Most_Average = ifelse(Player == "Kassi", 20, 0),
#          Second_Place = ifelse(Player == "Kevin", 100, 0),
#          First_Place = ifelse(Player == "Mitch", 200, 0)
#   ) %>% mutate(
#     Total_Prize = rowSums(select(., -Player))
#   )
# 
# entry_cost <- sum(final_prize$Total_Prize)/nrow(final_prize)
# 
# final_prize <- final_prize %>%
#   mutate(Entry_Cost = entry_cost,
#          Cumulative_Total = Total_Prize - Entry_Cost)
# 
# DT::datatable(final_prize %>% select(Player:First_Place),
#               filter = "top",
#               extensions = "Buttons",
#               options = list(buttons = c("csv", "pdf")))

Final Prize

Code

# DT::datatable(final_prize %>% select(c(Player, Total_Prize, Entry_Cost, Cumulative_Total)),
#               filter = "top",
#               extensions = "Buttons",
#               options = list(buttons = c("csv", "pdf")))

Next Draft Order

Code

sample <- samples[sample(1:nrow(samples), 2),]
draft_order <- bind_rows(rank(sample[1,]), rank(sample[2,])) %>%
  mutate(rnd = 1:2) %>%
  pivot_longer(cols = starts_with("mu")) %>%
  arrange(rnd, value) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  select(rnd, pick = value, player)
draft_order$draft_score <- 1:nrow(draft_order)

## temporary code for first five races
# players <- c("Akshata", "Eric", "Forrest", "Kevin", "Melissa", "Mitch")
# rnd1 <- sample(players, length(players), replace = FALSE)
# rnd2 <- sample(players, length(players), replace = FALSE)
# draft_order <- tibble(
#   rnd = rep(c(1, 2), each = length(players)),
#   pick = rep(1:length(players), 2),
#   player = c(rnd1, rnd2)
# )
# draft_order$draft_score <- 1:nrow(draft_order)

draft_pb <- draft_order %>%
  group_by(player) %>%
  summarize(draft_sum = sum(pick),
            draft_rnd1 = min(draft_score)) %>%
  rowwise() %>%
  arrange(-draft_sum, -draft_rnd1) %>%
  select(player) %>%
  ungroup() %>%
  mutate(rnd = 3,
         pick = row_number())

draft_order <- draft_order |> select(-draft_score)

draft_order_full <- bind_rows(
  draft_order, draft_pb
)

knitr::kable(draft_order_full)

rnd	pick	player
1	1	Kevin
1	2	Mitch
1	3	Forrest
1	4	Akshata
1	5	Melissa
1	6	Eric
2	1	Mitch
2	2	Kevin
2	3	Eric
2	4	Forrest
2	5	Akshata
2	6	Melissa
3	1	Melissa
3	2	Eric
3	3	Akshata
3	4	Forrest
3	5	Mitch
3	6	Kevin

Code

saveRDS(draft_order_full, "draft_order.rds")

Reminder - can’t draft list:

Code

last_race <- results$Grand_Prix[nrow(results)]

cant_draft <- results %>%
  filter(Grand_Prix == last_race) %>%
  select(Player, "Can't Draft" = Driver) %>%
  arrange(Player) 

cant_draft %>% knitr::kable()

Player	Can’t Draft
Akshata	Leclerc
Akshata	Stroll
Akshata	Bottas
Eric	Norris
Eric	Alonso
Eric	Gasly
Forrest	Hamilton
Forrest	Perez
Forrest	Ocon
Kevin	Verstappen
Kevin	Sainz
Kevin	Sargent
Melissa	Piastri
Melissa	Tsunoda
Melissa	Albon
Mitch	Russell
Mitch	Hulkenburg
Mitch	Ricciardo
NA	Magnussen
NA	Guanyu

Code

#cant_draft <- NULL #temporary for week 1
saveRDS(cant_draft, "cant_draft.rds")