Sports and Election Analyses

The graph below shows the tendency to pass by NFL Teams, in 2024, during their plays. If you’re. a football fan it might be interesting to you that the highest pass rate being the Arizona Cardinals at 60.1% pass rate and 298 plays, and the lowest being the New York Jets with a 40.6% pass rate and 234 plays.

If you’re a baseball fan, the plot below displays the top 10 MLB home run hitters, in 2025. Cal Raleigh, of the Seattle Mariners, is at the top with 60 home run hits, and number 10 is Nick Kurtz, of the Athletics, with around 36 home run hits.

Code:

############################################################
## 1. Check for Required Packages and Install if Missing
############################################################

# Check for Lahman (historical MLB data)
if (!requireNamespace("Lahman", quietly = TRUE)) {
  install.packages("Lahman")
}

# Check for tidyverse
if (!requireNamespace("tidyverse", quietly = TRUE)) {
  install.packages("tidyverse")
}


############################################################
## 2. Load Required Packages
############################################################

library(Lahman)
library(tidyverse)


############################################################
## 3. Define the Season of Interest
############################################################

# Most recently completed MLB season
season_year <- 2025


############################################################
## 4. Pull Season-Level Batting Data (Lahman)
############################################################

# The Batting table contains one row per player per season per team
batting_data <- Batting %>%
  filter(yearID == season_year)


############################################################
## 5. Inspect and Filter the Data
############################################################

# Calculate plate appearances (AB + BB + HBP + SF)
qualified_hitters <- batting_data %>%
  mutate(
    PA = AB + BB + HBP + SF
  ) %>%
  filter(PA >= 400)


############################################################
## 6. Summarize: Identify the Top Home Run Hitters
############################################################

# Players may appear multiple times if traded; combine them
hr_leaders <- qualified_hitters %>%
  group_by(playerID) %>%
  summarise(
    HR = sum(HR),
    PA = sum(PA),
    .groups = "drop"
  ) %>%
  arrange(desc(HR)) %>%
  slice_head(n = 10) %>%
  left_join(People, by = "playerID") %>%
  mutate(
    Name = paste(nameFirst, nameLast)
  ) %>%
  select(Name, PA, HR)


############################################################
## 7. Create the Visualization
############################################################

HRPlot <- ggplot(hr_leaders,
       aes(x = reorder(Name, HR), y = HR)) +
  geom_col(fill = "firebrick") +
  coord_flip() +
  labs(
    title = "Top 10 MLB Home Run Hitters",
    subtitle = "2025 Regular Season (Minimum 400 PA)",
    x = "Player",
    y = "Home Runs",
    caption = "Source: Lahman Baseball Database"
  ) +
  theme_minimal()

HRPlot

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")

library(tidyverse)
library(nflreadr)
library(plotly)

# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------

# Turn off scientific notation
options(scipen = 9999)

# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------

# Year can be adjusted as needed
data <- load_pbp(2025)

# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------

pbp_rp <- data %>%
  filter((rush == 1 | pass == 1), !is.na(epa))

# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------

mydata <- pbp_rp %>% 
  select(
    posteam,
    pass,
    wp,
    qtr,
    down,
    half_seconds_remaining
  )

glimpse(mydata)

# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half, 
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?

mydata_summary <- mydata %>%
  filter(
    wp > 0.20,
    wp < 0.80,
    down <= 2,
    qtr <= 2,
    half_seconds_remaining > 120
  ) %>%
  group_by(posteam) %>%
  summarize(
    mean_pass = mean(pass),
    plays = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_pass))

glimpse(mydata_summary)

# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------

mydata_summary <- mydata_summary %>%
  mutate(
    posteam = factor(
      posteam,
      levels = posteam[order(mean_pass, decreasing = TRUE)]
    )
  )

# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------

graph <- plot_ly(
  data = mydata_summary,
  x = ~posteam,
  y = ~mean_pass,
  type = "scatter",
  mode = "text",
  text = ~posteam,
  textposition = "middle center",
  hovertemplate = paste(
    "<b>%{text}</b><br>",
    "Pass rate: %{y:.1%}<br>",
    "Plays: %{customdata}<extra></extra>"
  ),
  customdata = ~plays
) %>%
  layout(
    title = "Tendency to pass, by NFL team, 2024",
    xaxis = list(
      title = "Team",
      showticklabels = FALSE
    ),
    yaxis = list(
      title = "Percent pass plays",
      tickformat = ".0%"
    )
  )

graph

True Margin Map of 2024 Presidential Election in Middle Tennessee

This true margin map below shows who won the 2024 presidential election in which precincts of Middle Tennessee but also by what margin.

Code:

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("sf")) install.packages("sf")
if (!require("leaflet")) install.packages("leaflet")
if (!require("leafpop")) install.packages("leafpop")

library(tidyverse)
library(sf)
library(leaflet)
library(leafpop)

# ----------------------------------------------------------
# Step 2: Load precinct-level vote data for Rutherford County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 3: Download and load Rutherford County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Precincts.zip",
  "RutherfordPrecinctMap.zip",
  mode = "wb"
)

unzip("RutherfordPrecinctMap.zip")

MapInfo <- read_sf("Rutherford_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 4: Join Rutherford vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapRuCo <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 5: Load precinct-level vote data for Davidson County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 6: Download and load Davidson County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Precincts.zip",
  "DavidsonPrecinctMap.zip",
  mode = "wb"
)

unzip("DavidsonPrecinctMap.zip")

MapInfo <- read_sf("Davidson_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 7: Join Davidson vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapDavidson <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 8: Combine Rutherford and Davidson precinct files
# ----------------------------------------------------------

DataAndMap <- DataAndMapRuCo %>%
  bind_rows(DataAndMapDavidson) %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 9: Compute totals, vote shares, winner, and TRUE margin
# ----------------------------------------------------------

MarginData <- DataAndMap %>%
  mutate(
    Total = Trump + Harris + Other,
    Pct_Trump  = (Trump  / Total) * 100,
    Pct_Harris = (Harris / Total) * 100
  ) %>%
  mutate(
    Winner = case_when(
      Trump > Harris ~ "Republican",
      Harris > Trump ~ "Democratic",
      TRUE           ~ "Tie"
    ),
    Margin_Pct = round(abs(Pct_Trump - Pct_Harris), 1)
  )

# ----------------------------------------------------------
# Step 10: Create party-aware color palettes
# ----------------------------------------------------------

dem_pal <- colorNumeric(
  palette = c("#deebf7", "#08519c"),
  domain  = MarginData$Margin_Pct
)

rep_pal <- colorNumeric(
  palette = c("#fee0d2", "#99000d"),
  domain  = MarginData$Margin_Pct
)

# ----------------------------------------------------------
# Step 11: Assign fill colors using dplyr
# ----------------------------------------------------------

MarginData <- MarginData %>%
  mutate(
    FillColor = case_when(
      Winner == "Democratic" ~ dem_pal(Margin_Pct),
      Winner == "Republican" ~ rep_pal(Margin_Pct),
      TRUE                   ~ "gray"
    )
  )

# ----------------------------------------------------------
# Step 12: Create popup table (with raw vote counts)
# ----------------------------------------------------------

PopupData <- MarginData %>%
  st_drop_geometry() %>%
  select(
    County,
    Precinct,
    Winner,
    Trump,
    Harris,
    Other,
    `Margin (percentage points)` = Margin_Pct
  )

popup_content <- popupTable(
  PopupData,
  feature.id = FALSE,
  row.numbers = FALSE
)

# ----------------------------------------------------------
# Step 13: TRUE margin choropleth map
# ----------------------------------------------------------

TrueMarginMap <- leaflet(MarginData) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor   = ~FillColor,
    fillOpacity = 0.7,
    color       = "black",
    weight      = 0.4,
    popup       = popup_content
  ) %>%
  addLegend(
    colors = c("#08519c", "#99000d"),
    labels = c("Democratic winner", "Republican winner"),
    title  = "Winner (by margin)",
    opacity = 1
  )

TrueMarginMap

Sports and Election Analyses

Ava Houghton

2026-04-18