library(rvest)library(stringr)library(tidyverse)library(lubridate)url <-"https://electionbettingodds.com/PresidentialParty2024.html"page <-read_html(url)script <- page %>%html_nodes("script") %>%html_text()data_additions <-str_extract_all(script, "data\\.addRows\\((.*?)\\);")data_content <- data_additions[[5]]clean_data <-gsub("^.*?\\[|\\];\"$", "", data_content)entries <-strsplit(clean_data, "\\],\\[")[[1]]parse_entry <-function(entry) {# Extract date part date_str <-str_extract(entry, "(?<=new Date\\().*?(?=\\))") date_parts <-as.numeric(strsplit(date_str, ",")[[1]])# R months are 1-12, but JavaScript months are 0-11, so we add 1 to the month date_part <-as.Date(ISOdate(date_parts[1], date_parts[2] +1, date_parts[3]))# Extract probabilities probs <-as.numeric(str_extract_all(entry, "\\d+\\.\\d+")[[1]])c(date_part, probs[1], probs[2])}parsed_data <-t(sapply(entries, parse_entry))betting_market_odds <-as.data.frame(parsed_data, stringsAsFactors =FALSE)remove(parsed_data, entries, data_content, data_additions, script, page, url, clean_data, parse_entry)colnames(betting_market_odds) <-c("Date", "Dem", "GOP")betting_market_odds$Date <-as.Date(betting_market_odds$Date, origin ="1970-01-01")betting_market_odds$GOP_Bet <-as.numeric(as.character(betting_market_odds$GOP))betting_market_odds$Dem_Bet <-as.numeric(as.character(betting_market_odds$Dem))betting_market_odds <- betting_market_odds[order(betting_market_odds$Date), ]# Keep only the last entry for each datebetting_market_odds <- betting_market_odds %>%group_by(Date) %>%slice_tail(n =1) %>%ungroup()# Change Date column to proper date formatbetting_market_odds$Date <-as.Date(betting_market_odds$Date, origin ="1970-01-01")# Keep only col 1 (Date) and 5 (Dem_Bet)betting_market_odds <- betting_market_odds |>select(1, 5)
2 Nate Silver
Show the code
# Bidennate_biden <-read_csv("raw_data/nate_biden_trump.csv", show_col_types =FALSE)nate_kamala <-read_csv("https://static.dwcdn.net/data/MPpof.csv", show_col_types =FALSE)# Change column 1 name to Datecolnames(nate_biden)[1] <-"Date"colnames(nate_kamala)[1] <-"Date"# Combine dfs by rowsnate_silver <-bind_rows(nate_biden, nate_kamala)# Convert Date column to proper date formatnate_silver <- nate_silver %>%mutate(Date =as.Date(Date, format ="%m/%d/%Y"))# Create new column ("Dem"), which takes value from "Biden" or "Harris" column (one is always blank)nate_silver$Dem_Nate <-ifelse(is.na(nate_silver$Biden), nate_silver$Harris, nate_silver$Biden)# Keep only col 1 (Date) and 5 (Dem_Nate)nate_silver <- nate_silver %>%select(1, 6)# # Optional: Add manual entry# # Uncomment and modify the following lines when you need to add a manual entry# manual_entry <- data.frame(Date = as.Date("2024-10-17"), Dem_Nate = 50.3)# nate_silver <- bind_rows(nate_silver, manual_entry) %>% arrange(Date)# # # Display the last few rows to verify# tail(nate_silver)
3 Metaculus
Show the code
# Load necessary libraries# Load necessary librarieslibrary(jsonlite)# Set the JSON URLurl <-"https://www.metaculus.com/api2/questions/20772/?format=json"# Fetch and parse the JSON datatryCatch({ data <-fromJSON(url)# Extract the relevant information dem_prob <- data$community_prediction# Print the resultcat("Democratic party's probability of winning: ", dem_prob, "\n")# Extract additional relevant informationcat("Question title: ", data$title, "\n")cat("Close time: ", data$close_time, "\n")# Extract prediction timeseries data prediction_timeseries <- data$prediction_timeseries# Print the latest few predictionscat("\nLatest few predictions:\n")print(tail(prediction_timeseries))}, error =function(e) {cat("An error occurred:", conditionMessage(e), "\n")})
Democratic party's probability of winning:
Question title: Which party will win the 2024 US presidential election?
Close time:
Latest few predictions:
NULL
Show the code
# Assuming the JSON data is loaded into a variable called 'data'metaculus <- data$question$aggregations$recency_weighted$history# Convert timestamps to datesmetaculus <- metaculus %>%mutate(start_date =as.POSIXct(start_time, origin ="1970-01-01", tz ="UTC"),end_date =as.POSIXct(end_time, origin ="1970-01-01", tz ="UTC") )# Extract probabilities from the 'means' column# Assuming the order is Democratic, Republican, Libertarian, Green, Othermetaculus <- metaculus %>%mutate(dem_prob =map_dbl(means, ~.x[1]),rep_prob =map_dbl(means, ~.x[2]),lib_prob =map_dbl(means, ~.x[3]),green_prob =map_dbl(means, ~.x[4]),other_prob =map_dbl(means, ~.x[5]) )# Select and rename relevant columnsmetaculus <- metaculus %>%select( start_date, end_date, forecaster_count, dem_prob, rep_prob, lib_prob, green_prob, other_prob )# Filter col 1 to "community_prediction", keep col 3 (rename Date), 4 (rename Dem), 5 (rename GOP)metaculus <- metaculus %>%select(1, 4, 5) %>%rename(Date =1, Dem_Metacalus =2, GOP_Metacalus =3)# Data column includes multiple entries for every day, sorted to minute. Keep only last entry for each day, then display only YYYY-MM-DDmetaculus$Date <-as.Date(metaculus$Date, origin ="1970-01-01")metaculus_clean <- metaculus %>%group_by(Date) %>%slice_tail(n =1) %>%ungroup() |># multiple by 100mutate(Dem_Metacalus = Dem_Metacalus *100, GOP_Metacalus = GOP_Metacalus *100) |>select(-GOP_Metacalus)
4 538
Not doing Biden model since GEM admitted (barely) it was broken. Will only include Harris model.
Show the code
# Download and unzip data from this link and save in raw_data folder https://projects.fivethirtyeight.com/2024-general-data/538_2024_election_forecast_data.zip# Load necessary libraryif (!require(utils)) install.packages("utils")# Set the URL and destinationurl <-"https://projects.fivethirtyeight.com/2024-general-data/538_2024_election_forecast_data.zip"fivethirtyeight_data <-"raw_data/538_2024_election_forecast_data.zip"dest_folder <-"raw_data"# Download the filedownload.file(url, fivethirtyeight_data, mode ="wb")# Unzip the fileunzip(fivethirtyeight_data, exdir = dest_folder)# Optionally, remove the zip file after extractionfile.remove(fivethirtyeight_data)
[1] TRUE
Show the code
# Load "raw_data/538_2024_election_forecast_data/daily_summary.csv"fivethirtyeight_data <-read_csv("raw_data/538_2024_election_forecast_data/daily_summary.csv", show_col_types =FALSE)# Filter variable column to only "electoral college" and party column to "DEM"fivethirtyeight_data <- fivethirtyeight_data %>%filter(variable =="electoral college", party =="DEM", metric =="p_win") %>%select(1, 7) |>rename(Date =1,Dem_538 =2) |># multiple by 100mutate(Dem_538 = Dem_538 *100)
# Combine all data (betting_market_odds, nate_silver, metaculus_clean, fivethirtyeight_data) by Dateall_data <- betting_market_odds %>%full_join(nate_silver, by ="Date") %>%full_join(metaculus_clean, by ="Date") %>%full_join(fivethirtyeight_data, by ="Date") |>full_join(data_economist, by ="Date") |>arrange(Date)
7 Graph
Show the code
library(dplyr)library(tidyr)# Function to fill NA with last non-NA valuefill_na_with_last <-function(x) { last_valid <-NAfor (i inseq_along(x)) {if (!is.na(x[i])) { last_valid <- x[i] } else { x[i] <- last_valid } }return(x)}# Apply the function to each source column and calculate the averageall_data <- all_data %>%arrange(Date) %>%mutate(across(c(Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ), fill_na_with_last)) %>%mutate(Dem_Avg =rowMeans(select(., Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ), na.rm =TRUE))# Select the desired columnsall_data_select <- all_data %>%select(Date, Dem_Bet, Dem_Nate, Dem_Metacalus, Dem_538, Dem_Econ, Dem_Avg)# Save as csv file with today's date in file namewrite_csv(all_data_select, paste0("output_data/all_data_", Sys.Date(), ".csv"))# Graph all 3 plus the avg on a faceted plotall_data_long <- all_data_select %>%pivot_longer(cols =-Date, names_to ="Source", values_to ="Dem")# Rename the sourcesall_data_long$Source <-recode(all_data_long$Source, "Dem_Bet"="Betting Market Average","Dem_Nate"="Nate Silver","Dem_Metacalus"="Metaculus","Dem_538"="538","Dem_Econ"="The Economist","Dem_Avg"="Average of All Sources")# First, modify the all_data_long dataframeall_data_long <- all_data_long %>%mutate(Source =factor(Source, levels =c("Average of All Sources", "Betting Market Average", "Metaculus", "Nate Silver", "The Economist", "538")))# Debugging: Print the data to check latest values# Identify the latest date for each sourcelatest_values <- all_data_long %>%group_by(Source) %>%slice_max(order_by = Date, n =1) |># round to 0mutate(Dem =round(Dem, 0))latest_values2 <- all_data_long %>%filter(Source =="Average of All Sources") %>%slice_max(order_by = Date, n =1) |># round to 0mutate(Dem =round(Dem, 0))p1 <-ggplot(all_data_long, aes(x = Date, y = Dem)) +geom_line() +labs(title ="Democratic Win Probability for President, 2021-2024",subtitle =paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),x ="Date",y ="Probability",caption ="Nick Warino. nickwarino.com.") +facet_wrap(~Source, nrow =3, ncol =2) +# 2x3 grid faceted plotMy_Theme_WithY() +# hide legendtheme(legend.position ="none") +ylim(0, 100) +# add horizontal line at 50xlim(as.Date("2021-01-01"), as.Date("2024-11-30")) +geom_hline(yintercept =50, linetype ="dashed", color ="red") +geom_text(data = latest_values, aes(label =round(Dem, 1)), hjust =0, vjust =1, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +theme(panel.spacing =unit(1, "lines"),strip.background =element_blank(),panel.border =element_rect(color ="black", fill =NA, size = .5) )# Now plot just the averagep2 <- all_data_long |>filter(Source =="Average of All Sources") |>ggplot(aes(x = Date, y = Dem)) +geom_line(size=2) +labs(title ="Democratic Win Probability for President, 2021-2024",subtitle =paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),x ="Date",y ="Probability",caption ="Nick Warino. nickwarino.com.") +geom_hline(yintercept =50, linetype ="dashed", color ="red") +My_Theme_WithY() +# hide legendtheme(legend.position ="none") +ylim(0, 100) +# add horizontal line at 50xlim(as.Date("2021-01-01"), as.Date("2024-11-30")) +geom_text(data = latest_values2, aes(label =round(Dem, 1)), hjust =0, vjust =1, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +theme(panel.spacing =unit(1, "lines"),strip.background =element_blank(),panel.border =element_rect(color ="black", fill =NA, size = .5) )# Now just 2024p3 <- all_data_long %>%filter(Date >"2023-12-31") %>%ggplot(aes(x = Date, y = Dem)) +geom_line() +labs(title ="Democratic Win Probability for President, 2024",subtitle =paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),x ="Date",y ="Probability",caption ="Nick Warino. nickwarino.com.") +facet_wrap(~Source, nrow =3, ncol =2) +# 2x3 grid faceted plotMy_Theme_WithY() +# hide legendtheme(legend.position ="none") +ylim(0, 100) +# add horizontal line at 50xlim(as.Date("2024-01-01"), as.Date("2024-11-30")) +geom_hline(yintercept =50, linetype ="dashed", color ="red") +geom_text(data = latest_values, aes(label =round(Dem, 1)), hjust =0, vjust =1, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +geom_vline(xintercept =as.Date("2024-06-27"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-07-21"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-09-10"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-11-05"), linetype ="dashed", color ="grey") +annotate("text", x =as.Date("2024-06-27"), y =60, label ="Biden-\nTrump\nDebate", hjust =1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-07-21"), y =60, label ="Biden\nDrops\nOut", hjust =-0.1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-09-10"), y =20, label ="Harris-\nTrump\nDebate", hjust =1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-11-05"), y =60, label ="E-Day", hjust =-0.1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +theme(panel.spacing =unit(.5, "lines"),strip.background =element_blank(),panel.border =element_rect(color ="black", fill =NA, size = .5) )# Assuming all_data_long and latest_values are your data framesp4 <- all_data_long %>%filter(Source =="Average of All Sources") %>%filter(Date >"2023-12-31") %>%ggplot(aes(x = Date, y = Dem)) +geom_line(size=2) +labs(title ="Democratic Win Probability for President, 2024",subtitle =paste("Data from Election Betting Odds, Nate Silver, 538, The Economist, and Metaculus. Then averaged. Last updated:", Sys.Date()),x ="Date",y ="Probability",caption ="Nick Warino. nickwarino.com.") +geom_hline(yintercept =50, linetype ="dashed", color ="red") +ylim(0, 100) +# change x axis to end at Nov 5, 2024xlim(as.Date("2024-01-01"), as.Date("2024-11-30")) +geom_text(data = latest_values2, aes(label =round(Dem, 1)), hjust =0, vjust =1, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +geom_vline(xintercept =as.Date("2024-06-27"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-07-21"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-09-10"), linetype ="dashed", color ="grey") +geom_vline(xintercept =as.Date("2024-11-05"), linetype ="dashed", color ="grey") +annotate("text", x =as.Date("2024-06-27"), y =60, label ="Biden-\nTrump\nDebate", hjust =-0.1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-07-21"), y =60, label ="Biden\nDrops\nOut", hjust =-0.1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-09-10"), y =60, label ="Harris-\nTrump\nDebate", hjust =0.01, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +annotate("text", x =as.Date("2024-11-05"), y =60, label ="E-Day", hjust =-0.1, vjust =0, size =4, family ="Avenir Next Condensed", color ="black", fontface ="bold") +My_Theme_WithY()# Save all 4 to /Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images and /Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/imagesggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2021-24_all_sources.png", p1, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2021-24_average.png", p2, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2024_all_sources.png", p3, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/Side/election_models_tracker/images/2024_average.png", p4, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2021-24_all_sources.png", p1, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2021-24_average.png", p2, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2024_all_sources.png", p3, width =10, height =10, units ="in", dpi =300)ggsave("/Users/nickmac/Dropbox/Projects/SEIU/Legislative Update/images/2024_average.png", p4, width =10, height =10, units ="in", dpi =300)