Monday and Wednesday Visualizations

Tendency to Pass

This chart shows which NFL teams were most likely to call pass plays in the first half on early downs in neutral game situations. Teams near the top passed more often, while teams near the bottom leaned more heavily on the run.

Code

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")

library(tidyverse)
library(nflreadr)
library(plotly)
# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------

# Turn off scientific notation
options(scipen = 9999)
# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------

# Year can be adjusted as needed
data <- load_pbp(2025)
# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------

pbp_rp <- data %>%
  filter((rush == 1 | pass == 1), !is.na(epa))
# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------

mydata <- pbp_rp %>% 
  select(
    posteam,
    pass,
    wp,
    qtr,
    down,
    half_seconds_remaining
  )

# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half, 
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?

mydata_summary <- mydata %>%
  filter(
    wp > 0.20,
    wp < 0.80,
    down <= 2,
    qtr <= 2,
    half_seconds_remaining > 120
  ) %>%
  group_by(posteam) %>%
  summarize(
    mean_pass = mean(pass),
    plays = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_pass))

# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------

mydata_summary <- mydata_summary %>%
  mutate(
    posteam = factor(
      posteam,
      levels = posteam[order(mean_pass, decreasing = TRUE)]
    )
  )
# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------

graph <- plot_ly(
  data = mydata_summary,
  x = ~posteam,
  y = ~mean_pass,
  type = "scatter",
  mode = "text",
  text = ~posteam,
  textposition = "middle center",
  hovertemplate = paste(
    "<b>%{text}</b><br>",
    "Pass rate: %{y:.1%}<br>",
    "Plays: %{customdata}<extra></extra>"
  ),
  customdata = ~plays
) %>%
  layout(
    title = "Tendency to pass, by NFL team, 2025",
    xaxis = list(
      title = "Team",
      showticklabels = FALSE
    ),
    yaxis = list(
      title = "Percent pass plays",
      tickformat = ".0%"
    )
  )

graph

Top 10 MLB Home Run Hitters

This graphic displays the top 10 MLB home run hitters from the selected season among players with at least 400 plate appearances. It makes it easy to compare which qualified hitters produced the most power during the year.

Code

############################################################
## 1. Check for Required Packages and Install if Missing
############################################################

# Check for Lahman (historical MLB data)
if (!requireNamespace("Lahman", quietly = TRUE)) {
  install.packages("Lahman")
}

# Check for tidyverse
if (!requireNamespace("tidyverse", quietly = TRUE)) {
  install.packages("tidyverse")
}


############################################################
## 2. Load Required Packages
############################################################

library(Lahman)
library(tidyverse)


############################################################
## 3. Define the Season of Interest
############################################################

# Most recently completed MLB season
season_year <- 2025


############################################################
## 4. Pull Season-Level Batting Data (Lahman)
############################################################

# The Batting table contains one row per player per season per team
batting_data <- Batting %>%
  filter(yearID == season_year)


############################################################
## 5. Inspect and Filter the Data
############################################################

# Calculate plate appearances (AB + BB + HBP + SF)
qualified_hitters <- batting_data %>%
  mutate(
    PA = AB + BB + HBP + SF
  ) %>%
  filter(PA >= 400)


############################################################
## 6. Summarize: Identify the Top Home Run Hitters
############################################################

# Players may appear multiple times if traded; combine them
hr_leaders <- qualified_hitters %>%
  group_by(playerID) %>%
  summarise(
    HR = sum(HR),
    PA = sum(PA),
    .groups = "drop"
  ) %>%
  arrange(desc(HR)) %>%
  slice_head(n = 10) %>%
  left_join(People, by = "playerID") %>%
  mutate(
    Name = paste(nameFirst, nameLast)
  ) %>%
  select(Name, PA, HR)


############################################################
## 7. Create the Visualization
############################################################

HRPlot <- ggplot(hr_leaders,
       aes(x = reorder(Name, HR), y = HR)) +
  geom_col(fill = "firebrick") +
  coord_flip() +
  labs(
    title = "Top 10 MLB Home Run Hitters",
    subtitle = "2025 Regular Season (Minimum 400 PA)",
    x = "Player",
    y = "Home Runs",
    caption = "Source: Lahman Baseball Database"
  ) +
  theme_minimal()

HRPlot

############################################################
## 8. Optional Extension Ideas (Not Executed)
############################################################
#
# - Compare HR with RBI or runs scored
# - Summarize by teamID
# - Compute HR per PA (rate stat)
# - Compare two seasons side by side
#
############################################################

True Margin Map

This map shows the true winning margin in precincts across Rutherford and Davidson counties. Darker shades indicate a larger margin of victory, while the color also shows whether the precinct was won by the Democratic or Republican candidate.

Code

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("sf")) install.packages("sf")
if (!require("leaflet")) install.packages("leaflet")
if (!require("leafpop")) install.packages("leafpop")

library(tidyverse)
library(sf)
library(leaflet)
library(leafpop)

# ----------------------------------------------------------
# Step 2: Load precinct-level vote data for Rutherford County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 3: Download and load Rutherford County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Precincts.zip",
  "RutherfordPrecinctMap.zip",
  mode = "wb"
)

unzip("RutherfordPrecinctMap.zip")

MapInfo <- read_sf("Rutherford_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 4: Join Rutherford vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapRuCo <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 5: Load precinct-level vote data for Davidson County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 6: Download and load Davidson County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Precincts.zip",
  "DavidsonPrecinctMap.zip",
  mode = "wb"
)

unzip("DavidsonPrecinctMap.zip")

MapInfo <- read_sf("Davidson_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 7: Join Davidson vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapDavidson <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 8: Combine Rutherford and Davidson precinct files
# ----------------------------------------------------------

DataAndMap <- DataAndMapRuCo %>%
  bind_rows(DataAndMapDavidson) %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 9: Compute totals, vote shares, winner, and TRUE margin
# ----------------------------------------------------------

MarginData <- DataAndMap %>%
  mutate(
    Total = Trump + Harris + Other,
    Pct_Trump  = (Trump  / Total) * 100,
    Pct_Harris = (Harris / Total) * 100
  ) %>%
  mutate(
    Winner = case_when(
      Trump > Harris ~ "Republican",
      Harris > Trump ~ "Democratic",
      TRUE           ~ "Tie"
    ),
    Margin_Pct = round(abs(Pct_Trump - Pct_Harris), 1)
  )

# ----------------------------------------------------------
# Step 10: Create party-aware color palettes
# ----------------------------------------------------------

dem_pal <- colorNumeric(
  palette = c("#deebf7", "#08519c"),
  domain  = MarginData$Margin_Pct
)

rep_pal <- colorNumeric(
  palette = c("#fee0d2", "#99000d"),
  domain  = MarginData$Margin_Pct
)

# ----------------------------------------------------------
# Step 11: Assign fill colors using dplyr
# ----------------------------------------------------------

MarginData <- MarginData %>%
  mutate(
    FillColor = case_when(
      Winner == "Democratic" ~ dem_pal(Margin_Pct),
      Winner == "Republican" ~ rep_pal(Margin_Pct),
      TRUE                   ~ "gray"
    )
  )

# ----------------------------------------------------------
# Step 12: Create popup table (with raw vote counts)
# ----------------------------------------------------------

PopupData <- MarginData %>%
  st_drop_geometry() %>%
  select(
    County,
    Precinct,
    Winner,
    Trump,
    Harris,
    Other,
    `Margin (percentage points)` = Margin_Pct
  )

popup_content <- popupTable(
  PopupData,
  feature.id = FALSE,
  row.numbers = FALSE
)

# ----------------------------------------------------------
# Step 13: TRUE margin choropleth map
# ----------------------------------------------------------

TrueMarginMap <- leaflet(MarginData) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor   = ~FillColor,
    fillOpacity = 0.7,
    color       = "black",
    weight      = 0.4,
    popup       = popup_content
  ) %>%
  addLegend(
    colors = c("#08519c", "#99000d"),
    labels = c("Democratic winner", "Republican winner"),
    title  = "Winner (by margin)",
    opacity = 1
  )

TrueMarginMap

Monday and Wednesday Visualizations

Mia Ruiz

2026-04-17

Tendency to Pass

Code

Top 10 MLB Home Run Hitters

Code

True Margin Map

Code