2024 F1 Fantasy League Weekly Summary Document

Summary

This is the running document of F1 Fantasy Stats for the 2024 season.

Race Results

Individual race results are below

Code
DT::datatable(results,
              filter = "top",
              extensions = "Buttons",
              options = list(
                buttons = c("csv", "pdf"),
                pageLength = 10,
                searchCols = list(list(search = ''))
              )
  )

Scores & Prizes

Code
race_prizes <- results |>
  filter(Player != "NA") |>
  group_by(Grand_Prix, Player) %>%
  summarize(Total_Points = sum(Total)) %>%
  group_by(Grand_Prix) %>%
  mutate(
   # rank = rank(-Total_Points),
    Prize = case_when(
    rank(-Total_Points) == 1 ~ 30,
    rank(-Total_Points) == 2 ~ 20,
    rank(-Total_Points) == 3 ~ 10,
    rank(-Total_Points) == 1.5 ~ 25,
    rank(-Total_Points) == 2.5 ~ 15,
    rank(-Total_Points) == 3.5 ~ 5,
    TRUE ~ 0
  )) %>% suppressMessages() #fix ties next

DT::datatable(race_prizes,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))
Code
saveRDS(race_prizes, "data/race_prizes.rds")
Code
cumulative_score <- results |>
  filter(Player != "NA") |>
  group_by(Player) %>%
  summarize(Total_Score = sum(Total)) %>% 
  arrange(-Total_Score) %>%
  suppressMessages()

DT::datatable(cumulative_score,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))
Code
total_prize <- race_prizes %>%
  group_by(Player) %>%
  summarize(Total_Prize = sum(Prize)) %>%
  arrange(-Total_Prize) %>%
  suppressMessages()

DT::datatable(total_prize,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Points Summaries

Mean and 95% CI

Code
plot_data_draft <- results |>
  mutate(Draft_Pos = paste0(Draft_Round,".",Draft_Pos),
         Draft_Pos = ifelse(Draft_Round == "NA", "NA", Draft_Pos)) |>
  group_by(Draft_Pos) %>%
  summarize(mean = mean(Total),
            sd = sd(Total),
            min = min(Total),
            n = n(),
            max = max(Total)) %>%
  mutate(#Draft_Pos = factor(Draft_Pos,
         #                      levels = c(as.character(1:10),
         #                                 paste0("PB",1:5))),
         lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_draft, 
       aes(x = Draft_Pos, y = mean, colour = Draft_Pos)) +
  geom_point() +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points")

ggplotly(p)
Code
plot_data_driver <- results %>%
  rowwise() %>%
  mutate(Points = sum(c(Race_Points, Qualifying, Fastest_Lap, Pole),
                      na.rm = TRUE)) %>%
  group_by(Driver) %>%
  summarize(mean = mean(Points),
            sd = sd(Points),
            min = min(Points),
            n = n(),
            max = max(Points)) %>%
  mutate(lb = mean - 1.96*sd/sqrt(n), #update later with error cis
         lb = ifelse(lb <= 0, 0, lb),
         ub = mean + 1.96*sd/sqrt(n)) 

p <- ggplot(plot_data_driver, 
       aes(x = Driver, y = mean, colour = Driver)) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 45)) +
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  ylab("Points (No PB Bonus)")

ggplotly(p)

Model

[model description by chatGPT (to update), will run model and associated outputs following race 5]

The hierarchical model used in this analysis is structured as follows:

Let \(y_i\) represent the observed data for the \(i\)-th observation, and \(\mu_i\) represent the underlying true value for that observation.

For each observation \(i\) (ranging from 1 to \(N\)), \(\mu_i\) is assumed to follow a normal distribution with mean \(\mu_{\text{group}}[i]\), where \(\mu_{\text{group}}[i]\) is a group-specific mean corresponding to the group of the \(i\)-th observation. The variance of this normal distribution is \(\tau_{\text{group}}[i]\), representing the group-specific variance.

Additionally, the observed data \(y_i\) is assumed to follow a normal distribution with mean \(\mu_i\) and a common variance parameter \(\tau\).

The group-specific means \(\mu_{\text{group}}[i]\) are assumed to follow a normal distribution with mean \(\mu_{\text{prior}}\) and variance \(\tau_{\mu}\).

The hyperparameters \(\sigma\) and \(\sigma_{\mu}\) represent the standard deviations for the common variance \(\tau\) and the group-specific means’ variance \(\tau_{\mu}\) respectively.

The priors for \(\sigma\) and \(sigma_{\mu}\) are specified as uniform distributions between 0 and 100.

The prior for \(\mu_{\text{prior}}\) is assumed to be a normal distribution with mean 0 and a very small variance to reflect little prior information about the group means.

The hierarchical model aims to estimate the group-specific means and variances, as well as the common variance parameter, using the observed data and specified prior distributions.

Model summary statistics by player:

Code
#fit model
results_jags <- results %>%
  group_by(Grand_Prix, Player) %>%
  summarise(points = sum(Total)) %>%
  filter(Player != "NA") |>
  suppressMessages() 

jags_data <- list(
  player = as.integer(factor(results_jags$Player)),
  y = results_jags$points,
  N = length(results_jags$points),
  J = length(unique(results_jags$Player))
)

model_text <- "
model {
  for (i in 1:N) {
    mu[i] ~ dnorm(mu_group[player[i]], tau_group[player[i]]) # group-specific means
    y[i] ~ dnorm(mu[i], tau) #T(0, )                                 # likelihood
  }

  # Group priors
  for (j in 1:J) {
    mu_group[j] ~ dnorm(mu_prior, tau_mu) #T(0, )    # prior for group mean
    tau_group[j] ~ dgamma(0.001, 0.001)         # prior for group variance (precision)
  }

  # Priors
  tau <- pow(sigma, -2)
  sigma ~ dunif(0, 100)
  tau_mu <- pow(sigma_mu, -2)
  sigma_mu ~ dunif(0, 100)

  mu_prior ~ dnorm(0, 0.001)

}
"


model <- jags.model(textConnection(model_text),
                    data = jags_data,
                    n.chains = 1,
                    quiet = T,
                    inits = list(.RNG.seed = seed,
                                 .RNG.name = "base::Super-Duper"))
update(model, n.iter = 1e5)
mcmc <- coda.samples(model = model, variable.names = c("mu_group"), n.iter = 1e4)
samples <- lapply(mcmc, as_tibble) %>% bind_rows()

#plots
plot_samples <- samples %>%
  pivot_longer(cols = everything()) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         ))

mcmc_stats <- plot_samples %>% group_by(player) %>%
  summarize(
    mean = mean(value),
    sd = sd(value),
    lb = quantile(value, 0.025),
    ub = quantile(value, 0.975)
  ) %>%
  rename(Player = player)

obs_stats <- results_jags %>%
  group_by(Player) %>%
  summarize(mean_obs = mean(points),
            sd_obs = sd(points),
            n = n())

stats <- full_join(obs_stats, mcmc_stats, by = "Player") %>%
  select(
    Player,
    "Player Mean" = mean_obs,
    "Player SD" = sd_obs,
    "Model Mean" = mean,
    "Model SD" = sd,
    "95% CrI LB" = lb,
    "95% CrI UB" = ub
  )

plot_stats <- full_join(obs_stats, mcmc_stats, by = "Player")  %>%
  mutate(lb_obs = mean_obs - 1.96*sd_obs/sqrt(n),
         ub_obs = mean_obs + 1.96*sd_obs/sqrt(n)) %>%
  select(Player, mean_obs, lb_obs, ub_obs, mean, lb, ub)
plot_obs_stats <- plot_stats %>%
  select(Player, mean = mean_obs, lb = lb_obs, ub = ub_obs) %>%
  mutate(stat = "Observed")
plot_model_stats <- plot_stats %>%
  select(Player, mean, lb = lb, ub = ub) %>%
  mutate(stat = "Modeled")
plot_stats_full <- bind_rows(plot_obs_stats, plot_model_stats)

forest_plot <- ggplot(plot_stats_full,
                      aes(x = Player, y = mean, colour = stat)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = lb, ymax = ub),
                width = 0.1,
                position=position_dodge(width=0.5)) +
  geom_hline(yintercept = mean(obs_stats$mean_obs), linetype = "dashed") +
  coord_flip()

ranks <- samples %>%
  rowwise() %>%
  mutate(rank1 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[1],
         rank2 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[2],
         rank3 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[3],
         rank4 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[4],
         rank5 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[5],
         rank6 = rank(-c_across(`mu_group[1]`:`mu_group[6]`), ties.method = "first")[6]) %>%
  pivot_longer(rank1:rank6) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  group_by(player) %>%
  summarize("1" = mean(value == 1),
            "2" = mean(value == 2),
            "3" = mean(value == 3),
            "4" = mean(value == 4),
            "5" = mean(value == 5),
            "6" = mean(value == 6)) %>%
  pivot_longer("1":"6") %>%
  rename(Position = name)

density_plot <- ggplot(plot_samples,
                       aes(x = value, colour = player, fill = player)) +
  geom_density(alpha = 0.5, adjust = 4)

rank_plot <- ggplot(ranks,
                    aes(x = Position, y = value, colour = Position, fill = Position)) +
  geom_bar(stat = "identity", alpha = 0.75) +
  facet_wrap(~player) +
  geom_text(aes(label = round(value, 2)), colour = "black", size = 4)

DT::datatable(stats,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf"))) %>%
  formatRound(2:7, 2)

Summary Plots

Code
overall_mean <- mean(race_prizes$Total_Points)

ggplot(data = race_prizes,
       aes(x = Total_Points, colour = Player, fill = Player)) +
  geom_histogram() +
  facet_wrap(~ Player) +
  geom_vline(xintercept = overall_mean, linetype = "dashed")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Code
forest_plot

Code
density_plot

Code
rank_plot

All Prizes

Code
best_bottom <- results |>
  filter(Draft_Round == "BB") |>
  group_by(Player) |>
  summarize(bottom_total = sum(Total))

final_prize <- total_prize %>%
  mutate(Most_Extreme = ifelse(Player == "Mitch", 20, 0),
         Most_Average = ifelse(Player == "Melissa", 20, 0),
         Best_Bottom = ifelse(Player == "Eric", 20, 0),
         Third_Place = ifelse(Player == "Eric", 50, 0),
         Second_Place = ifelse(Player == "Melissa", 100, 0),
         First_Place = ifelse(Player == "Akshata", 200, 0),
         Weekly_Prize = Total_Prize
  ) |> 
  select(-Total_Prize) %>% 
  mutate(
    Total_Prize = rowSums(select(., -Player))
  )

entry_cost <- sum(final_prize$Total_Prize)/nrow(final_prize)

final_prize <- final_prize %>%
  mutate(Entry_Cost = entry_cost + 13 + 2/3,
         Cumulative_Total = Total_Prize - Entry_Cost)

DT::datatable(final_prize,
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Final Prize

Code
DT::datatable(final_prize %>% select(c(Player, Total_Prize, Entry_Cost, Cumulative_Total)),
              filter = "top",
              extensions = "Buttons",
              options = list(buttons = c("csv", "pdf")))

Next Draft Order

Code
sample <- samples[sample(1:nrow(samples), 2),]
draft_order <- bind_rows(rank(sample[1,]), rank(sample[2,])) %>%
  mutate(rnd = 1:2) %>%
  pivot_longer(cols = starts_with("mu")) %>%
  arrange(rnd, value) %>%
  mutate(player_number = str_extract(name, "\\d+\\.?\\d*"),
         player = case_when(
           player_number == "1" ~ "Akshata",
           player_number == "2" ~ "Eric",
           player_number == "3" ~ "Forrest",
           player_number == "4" ~ "Kevin",
           player_number == "5" ~ "Melissa",
           player_number == "6" ~ "Mitch"
         )) %>%
  select(rnd, pick = value, player)
draft_order$draft_score <- 1:nrow(draft_order)

## temporary code for first five races
# players <- c("Akshata", "Eric", "Forrest", "Kevin", "Melissa", "Mitch")
# rnd1 <- sample(players, length(players), replace = FALSE)
# rnd2 <- sample(players, length(players), replace = FALSE)
# draft_order <- tibble(
#   rnd = rep(c(1, 2), each = length(players)),
#   pick = rep(1:length(players), 2),
#   player = c(rnd1, rnd2)
# )
# draft_order$draft_score <- 1:nrow(draft_order)

draft_pb <- draft_order %>%
  group_by(player) %>%
  summarize(draft_sum = sum(pick),
            draft_rnd1 = min(draft_score)) %>%
  rowwise() %>%
  arrange(-draft_sum, -draft_rnd1) %>%
  select(player) %>%
  ungroup() %>%
  mutate(rnd = 3,
         pick = row_number())

draft_order <- draft_order |> select(-draft_score)

draft_order_full <- bind_rows(
  draft_order, draft_pb
)

#uncomment this to display next year
#knitr::kable(draft_order_full)

saveRDS(draft_order_full, "draft_order.rds")

Reminder - can’t draft list:

Code
last_race <- results$Grand_Prix[nrow(results)]

cant_draft <- results %>%
  filter(Grand_Prix == last_race) %>%
  select(Player, "Can't Draft" = Driver) %>%
  arrange(Player) 

#uncomment this to display next year
#cant_draft %>% knitr::kable()

#cant_draft <- NULL #temporary for week 1
saveRDS(cant_draft, "cant_draft.rds")