Election Models Tracker

Author

Nick Warino

Published

October 18, 2024

1 Betting Market Odds

Show the code
library(rvest)
library(stringr)
library(tidyverse)
library(lubridate)

url <- "https://electionbettingodds.com/PresidentialParty2024.html"
page <- read_html(url)
script <- page %>% html_nodes("script") %>% html_text()
data_additions <- str_extract_all(script, "data\\.addRows\\((.*?)\\);")
data_content <- data_additions[[5]]
clean_data <- gsub("^.*?\\[|\\];\"$", "", data_content)
entries <- strsplit(clean_data, "\\],\\[")[[1]]

parse_entry <- function(entry) {
  # Extract date part
  date_str <- str_extract(entry, "(?<=new Date\\().*?(?=\\))")
  date_parts <- as.numeric(strsplit(date_str, ",")[[1]])
  
  # R months are 1-12, but JavaScript months are 0-11, so we add 1 to the month
  date_part <- as.Date(ISOdate(date_parts[1], date_parts[2] + 1, date_parts[3]))
  
  # Extract probabilities
  probs <- as.numeric(str_extract_all(entry, "\\d+\\.\\d+")[[1]])
  
  c(date_part, probs[1], probs[2])
}

parsed_data <- t(sapply(entries, parse_entry))

betting_market_odds <- as.data.frame(parsed_data, stringsAsFactors = FALSE)

remove(parsed_data, entries, data_content, data_additions, script, page, url, clean_data, parse_entry)

colnames(betting_market_odds) <- c("Date", "Dem", "GOP")

betting_market_odds$Date <- as.Date(betting_market_odds$Date, origin = "1970-01-01")
betting_market_odds$GOP_Bet <- as.numeric(as.character(betting_market_odds$GOP))
betting_market_odds$Dem_Bet <- as.numeric(as.character(betting_market_odds$Dem))

betting_market_odds <- betting_market_odds[order(betting_market_odds$Date), ]

# Keep only the last entry for each date
betting_market_odds <- betting_market_odds %>%
  group_by(Date) %>%
  slice_tail(n = 1) %>%
  ungroup()

# Change Date column to proper date format
betting_market_odds$Date <- as.Date(betting_market_odds$Date, origin = "1970-01-01")

# Keep only col 1 (Date) and 5 (Dem_Bet)
betting_market_odds <- betting_market_odds |> 
  select(1, 5)

2 Nate Silver

Show the code
# Biden
nate_biden <- read_csv("raw_data/nate_biden_trump.csv", show_col_types = FALSE)
nate_kamala <- read_csv("https://static.dwcdn.net/data/MPpof.csv", show_col_types = FALSE)

# Change column 1 name to Date
colnames(nate_biden)[1] <- "Date"
colnames(nate_kamala)[1] <- "Date"

# Combine dfs by rows
nate_silver <- bind_rows(nate_biden, nate_kamala)

# Convert Date column to proper date format
nate_silver <- nate_silver %>%
  mutate(Date = as.Date(Date, format = "%m/%d/%Y"))

# Create new column ("Dem"), which takes value from "Biden" or "Harris" column (one is always blank)
nate_silver$Dem_Nate <- ifelse(is.na(nate_silver$Biden), nate_silver$Harris, nate_silver$Biden)

# Keep only col 1 (Date) and 5 (Dem_Nate)
nate_silver <- nate_silver %>%
  select(1, 6)


# # Optional: Add manual entry
# # Uncomment and modify the following lines when you need to add a manual entry
# manual_entry <- data.frame(Date = as.Date("2024-10-17"), Dem_Nate = 50.3)
# nate_silver <- bind_rows(nate_silver, manual_entry) %>% arrange(Date)
# 
# # Display the last few rows to verify
# tail(nate_silver)

3 Metaculus

Show the code
# Load necessary libraries
# Load necessary libraries
library(jsonlite)

# Set the JSON URL
url <- "https://www.metaculus.com/api2/questions/20772/?format=json"

# Fetch and parse the JSON data
tryCatch({
  data <- fromJSON(url)
  
  # Extract the relevant information
  dem_prob <- data$community_prediction
  
  # Print the result
  cat("Democratic party's probability of winning: ", dem_prob, "\n")
  
  # Extract additional relevant information
  cat("Question title: ", data$title, "\n")
  cat("Close time: ", data$close_time, "\n")
  
  # Extract prediction timeseries data
  prediction_timeseries <- data$prediction_timeseries
  
  # Print the latest few predictions
  cat("\nLatest few predictions:\n")
  print(tail(prediction_timeseries))

}, error = function(e) {
  cat("An error occurred:", conditionMessage(e), "\n")
})
Democratic party's probability of winning:  
Question title:  Which party will win the 2024 US presidential election? 
Close time:  

Latest few predictions:
NULL
Show the code
# Assuming the JSON data is loaded into a variable called 'data'
metaculus <- data$question$aggregations$recency_weighted$history

# Convert timestamps to dates
metaculus <- metaculus %>%
  mutate(
    start_date = as.POSIXct(start_time, origin = "1970-01-01", tz = "UTC"),
    end_date = as.POSIXct(end_time, origin = "1970-01-01", tz = "UTC")
  )

# Extract probabilities from the 'means' column
# Assuming the order is Democratic, Republican, Libertarian, Green, Other
metaculus <- metaculus %>%
  mutate(
    dem_prob = map_dbl(means, ~.x[1]),
    rep_prob = map_dbl(means, ~.x[2]),
    lib_prob = map_dbl(means, ~.x[3]),
    green_prob = map_dbl(means, ~.x[4]),
    other_prob = map_dbl(means, ~.x[5])
  )

# Select and rename relevant columns
metaculus <- metaculus %>%
  select(
    start_date,
    end_date,
    forecaster_count,
    dem_prob,
    rep_prob,
    lib_prob,
    green_prob,
    other_prob
  )


# Filter col 1 to "community_prediction", keep col 3 (rename Date), 4 (rename Dem), 5 (rename GOP)
metaculus <- metaculus %>%
  select(1, 4, 5) %>%
  rename(Date = 1, Dem_Metacalus = 2, GOP_Metacalus = 3)

# Data column includes multiple entries for every day, sorted to minute. Keep only last entry for each day, then display only YYYY-MM-DD
metaculus$Date <- as.Date(metaculus$Date, origin = "1970-01-01")
metaculus_clean <- metaculus %>%
  group_by(Date) %>%
  slice_tail(n = 1) %>%
  ungroup() |> # multiple by 100
  mutate(Dem_Metacalus = Dem_Metacalus * 100, GOP_Metacalus = GOP_Metacalus * 100) |> 
  select(-GOP_Metacalus)

4 538

Not doing Biden model since GEM admitted (barely) it was broken. Will only include Harris model.

Show the code
# Download and unzip data from this link and save in raw_data folder https://projects.fivethirtyeight.com/2024-general-data/538_2024_election_forecast_data.zip

# Load necessary library
if (!require(utils)) install.packages("utils")

# Set the URL and destination
url <- "https://projects.fivethirtyeight.com/2024-general-data/538_2024_election_forecast_data.zip"
fivethirtyeight_data <- "raw_data/538_2024_election_forecast_data.zip"
dest_folder <- "raw_data"

# Download the file
download.file(url, fivethirtyeight_data, mode = "wb")

# Unzip the file
unzip(fivethirtyeight_data, exdir = dest_folder)

# Optionally, remove the zip file after extraction
file.remove(fivethirtyeight_data)
[1] TRUE
Show the code
# Load "raw_data/538_2024_election_forecast_data/daily_summary.csv"
fivethirtyeight_data <- read_csv("raw_data/538_2024_election_forecast_data/daily_summary.csv", show_col_types = FALSE)

# Filter variable column to only "electoral college" and party column to "DEM"
fivethirtyeight_data <- fivethirtyeight_data %>%
  filter(variable == "electoral college", party == "DEM", metric == "p_win") %>%
  select(1, 7) |> 
  rename(Date = 1,
         Dem_538 = 2) |> # multiple by 100
  mutate(Dem_538 = Dem_538 * 100)

5 Economist

Show the code
# Create sequence of dates
dates <- seq(as.Date("2024-06-01"), Sys.Date(), by = "day")

# Estimate Democratic probability values day by day
# These values are estimated by closely examining the graph
dem_prob <- c(
  # June
  40, # 1st
  40, # 2nd
  38, # 3rd
  37, # 4th
  38, # 5th
  40, # 6th
  42, # 7th
  41, # 8th
  41, # 9th
  40, # 10th
  41, # 11th
  40, # 12th
  40, # 13th
  37, # 14th
  36, # 15th
  36, # 16th
  35, # 17th
  38, # 18th
  37, # 19th
  37, # 20th
  40, # 21st
  39, # 22nd
  40, # 23rd
  39, # 24th
  38, # 25th
  37, # 26th
  37, # 27th
  36, # 28th
  35, # 29th
  32, # 30th
  
  # July
  31, # 1st
  32, # 2nd
  32, # 3rd
  30, # 4th
  31, # 5th
  28, # 6th
  33, # 7th
  33, # 8th
  32, # 9th
  33, # 10th
  32, # 11th
  33, # 12th
  32, # 13th
  33, # 14th
  31, # 15th
  30, # 16th
  31, # 17th
  32, # 18th
  24, # 19th
  25, # 20th
  24, # 21st
  24, # 22nd
  24, # 23rd
  24, # 24th
  24, # 25th
  24, # 26th
  24, # 27th
  24, # 28th
  24, # 29th
  24, # 30th
  24, # 31st
  
  # August
  43, # 1st
  45, # 2nd
  45, # 3rd
  45, # 4th
  47, # 5th
  48, # 6th
  48, # 7th
  48, # 8th
  48, # 9th
  50, # 10th
  50, # 11th
  52, # 12th
  52, # 13th
  52, # 14th
  52, # 15th
  49, # 16th
  49, # 17th
  49, # 18th
  52, # 19th
  51, # 20th
  51, # 21st
  52, # 22nd
  55, # 23rd
  55, # 24th
  55, # 25th
  55, # 26th
  54, # 27th
  54, # 28th
  56, # 29th
  54, # 30th
  53, # 31st
  
  # September
  53, # 1st
  52, # 2nd
  54, # 3rd
  54, # 4th
  53, # 5th
  52, # 6th
  50, # 7th
  52, # 8th
  52, # 9th
  51, # 10th
  53, # 11th
  51, # 12th
  51, # 13th
  52, # 14th
  53, # 15th
  52, # 16th
  56, # 17th
  57, # 18th
  59, # 19TH
  57, # 20TH
  56, # 21ST
  57, # 22ND
  57, # 23RD
  53, # 24TH
  55, # 25TH
  54, # 26TH
  58, # 27TH
  57, # 28TH
  57, # 29TH
  57, # 30TH
  
# October

  55, # 1st
  56, # 2nd
  55, # 3rd
  54, # 4th
  53, # 5th
  53, # 6th
  53, # 7th
  54, # 8th
  54, # 9th
  51, # 10th
  51, # 11th
  51, # 12th
  51, # 13th
  52, # 14th
  54, # 15th
  54, # 16th
  54, # 17th
  54
)

# Create the dataframe
data_economist <- data.frame(
  Date = dates,
  Dem_Econ = dem_prob
)

# Graph

6 Combine

Show the code
# Combine all data (betting_market_odds, nate_silver, metaculus_clean, fivethirtyeight_data) by Date
all_data <- betting_market_odds %>%
  full_join(nate_silver, by = "Date") %>%
  full_join(metaculus_clean, by = "Date") %>%
  full_join(fivethirtyeight_data, by = "Date") |>
  full_join(data_economist, by = "Date") |>  arrange(Date)

7 Graph

Show the code
library(dplyr)
library(tidyr)

# Function to fill NA with last non-NA value
fill_na_with_last <- function(x) {
  last_valid <- NA
  for (i in seq_along(x)) {
    if (!is.na(x[i])) {
      last_valid <- x[i]
    } else {
      x[i] <- last_valid
    }
  }
  return(x)
}

# Apply the function to each source column and calculate the average
all_data <- all_data %>%
  arrange(Date) %>%
  mutate(across(c(Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ), fill_na_with_last)) %>%
  mutate(Dem_Avg = rowMeans(select(., Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ), na.rm = TRUE))

# Select the desired columns
all_data_select <- all_data %>%
  select(Date, Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ, Dem_Avg)

# Save as csv file with today's date in file name
write_csv(all_data_select, paste0("output_data/all_data_", Sys.Date(), ".csv"))

# Graph all 3 plus the avg on a faceted plot
all_data_long <- all_data_select %>%
  pivot_longer(cols = -Date, names_to = "Source", values_to = "Dem")

# Rename the sources
all_data_long$Source <- recode(all_data_long$Source, 
                                "Dem_Bet" = "Betting Market Average",
                                "Dem_Nate" = "Nate Silver",
                                "Dem_Metacalus" = "Metaculus",
                                "Dem_538" = "538",
                                "Dem_Econ" = "The Economist",
                                "Dem_Avg" = "Average of All Sources")

# First, modify the all_data_long dataframe
all_data_long <- all_data_long %>%
  mutate(Source = factor(Source, levels = c("Average of All Sources", "Betting Market Average", "Metaculus", "Nate Silver", "The Economist", "538")))


# Debugging: Print the data to check latest values
# Identify the latest date for each source
latest_values <- all_data_long %>%
  group_by(Source) %>%
  slice_max(order_by = Date, n = 1) |> # round to 0
  mutate(Dem = round(Dem, 0))

latest_values2 <- all_data_long %>%
  filter(Source == "Average of All Sources") %>% 
  slice_max(order_by = Date, n = 1) |> # round to 0
  mutate(Dem = round(Dem, 0))

p1 <- ggplot(all_data_long, aes(x = Date, y = Dem)) +
  geom_line() +
  labs(title = "Democratic Win Probability for President, 2021-2024",
       subtitle = paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),
       x = "Date",
       y = "Probability",
       caption = "Nick Warino. nickwarino.com.") +
  facet_wrap(~Source, nrow = 3, ncol = 2) +  # 2x3 grid faceted plot
  My_Theme_WithY() + # hide legend
  theme(legend.position = "none") +
  ylim(0, 100) + # add horizontal line at 50
  xlim(as.Date("2021-01-01"), as.Date("2024-11-30")) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") +
  geom_text(data = latest_values, 
            aes(label = round(Dem, 1)), 
            hjust = 0, vjust = 1, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold")  +
  theme(
    panel.spacing = unit(1, "lines"),
    strip.background = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, size = .5)
  )


# Now plot just the average
p2 <- all_data_long |> filter(Source == "Average of All Sources") |>
ggplot(aes(x = Date, y = Dem)) +
  geom_line(size=2) +
  labs(title = "Democratic Win Probability for President, 2021-2024",
       subtitle = paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),
       x = "Date",
       y = "Probability",
       caption = "Nick Warino. nickwarino.com.") +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") +
  My_Theme_WithY() + # hide legend
  theme(legend.position = "none") +
  ylim(0, 100) + # add horizontal line at 50
  xlim(as.Date("2021-01-01"), as.Date("2024-11-30")) +
  geom_text(data = latest_values2, 
            aes(label = round(Dem, 1)), 
            hjust = 0, vjust = 1, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  theme(
    panel.spacing = unit(1, "lines"),
    strip.background = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, size = .5)
  )

# Now just 2024
p3 <- all_data_long %>% 
  filter(Date > "2023-12-31") %>%
  ggplot(aes(x = Date, y = Dem)) +
  geom_line() +
  labs(title = "Democratic Win Probability for President, 2024",
       subtitle = paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),
       x = "Date",
       y = "Probability",
       caption = "Nick Warino. nickwarino.com.") +
  facet_wrap(~Source, nrow = 3, ncol = 2) +  # 2x3 grid faceted plot
  My_Theme_WithY() + # hide legend
  theme(legend.position = "none") +
  ylim(0, 100) + # add horizontal line at 50
  xlim(as.Date("2024-01-01"), as.Date("2024-11-30")) +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") +
  geom_text(data = latest_values, 
            aes(label = round(Dem, 1)), 
            hjust = 0, vjust = 1, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  geom_vline(xintercept = as.Date("2024-06-27"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-07-21"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-09-10"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-11-05"), linetype = "dashed", color = "grey") +
  annotate("text", x = as.Date("2024-06-27"), y = 60, label = "Biden-\nTrump\nDebate", hjust = 1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  annotate("text", x = as.Date("2024-07-21"), y = 60, label = "Biden\nDrops\nOut", hjust = -0.1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  annotate("text", x = as.Date("2024-09-10"), y = 20, label = "Harris-\nTrump\nDebate", hjust = 1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold")  +
  annotate("text", x = as.Date("2024-11-05"), y = 60, label = "E-Day", hjust = -0.1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  theme(
    panel.spacing = unit(.5, "lines"),
    strip.background = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, size = .5)
  )

# Assuming all_data_long and latest_values are your data frames
p4 <- all_data_long %>% 
  filter(Source == "Average of All Sources") %>% 
  filter(Date > "2023-12-31") %>%
  ggplot(aes(x = Date, y = Dem)) +
  geom_line(size=2) +
  labs(title = "Democratic Win Probability for President, 2024",
       subtitle = paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),
       x = "Date",
       y = "Probability",
       caption = "Nick Warino. nickwarino.com.") +
  geom_hline(yintercept = 50, linetype = "dashed", color = "red") +
  ylim(0, 100) + # change x axis to end at Nov 5, 2024
  xlim(as.Date("2024-01-01"), as.Date("2024-11-30")) +
  geom_text(data = latest_values2, 
            aes(label = round(Dem, 1)), 
            hjust = 0, vjust = 1, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") + 
  geom_vline(xintercept = as.Date("2024-06-27"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-07-21"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-09-10"), linetype = "dashed", color = "grey") +
  geom_vline(xintercept = as.Date("2024-11-05"), linetype = "dashed", color = "grey") +
  annotate("text", x = as.Date("2024-06-27"), y = 60, label = "Biden-\nTrump\nDebate", hjust = -0.1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  annotate("text", x = as.Date("2024-07-21"), y = 60, label = "Biden\nDrops\nOut", hjust = -0.1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
    annotate("text", x = as.Date("2024-09-10"), y = 60, label = "Harris-\nTrump\nDebate", hjust = 0.01, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold")  +
  annotate("text", x = as.Date("2024-11-05"), y = 60, label = "E-Day", hjust = -0.1, vjust = 0, size = 4, family = "Avenir Next Condensed", color = "black", fontface = "bold") +
  My_Theme_WithY()

# Save all 4 to /Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images and /Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images
ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2021-24_all_sources.png", p1, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2021-24_average.png", p2, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2024_all_sources.png", p3, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2024_average.png", p4, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2021-24_all_sources.png", p1, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2021-24_average.png", p2, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2024_all_sources.png", p3, width = 10, height = 10, units = "in", dpi = 300)
ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2024_average.png", p4, width = 10, height = 10, units = "in", dpi = 300)
Show the code
p1

Show the code
p3

Show the code
p2

Show the code
p4