The graph below shows the percentage of pass plays thrown by each NFL team in the 2025 season. The Arizona Cardinals threw the most, with 60.1% of their plays being passes, and the New York Jets threw the least, with 40.6% of their plays being passes.
# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")
library(tidyverse)
library(nflreadr)
library(plotly)
# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------
# Turn off scientific notation
options(scipen = 9999)
# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------
# Year can be adjusted as needed
data <- load_pbp(2025)
# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------
pbp_rp <- data %>%
filter((rush == 1 | pass == 1), !is.na(epa))
# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------
mydata <- pbp_rp %>%
select(
posteam,
pass,
wp,
qtr,
down,
half_seconds_remaining
)
glimpse(mydata)
# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half,
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?
mydata_summary <- mydata %>%
filter(
wp > 0.20,
wp < 0.80,
down <= 2,
qtr <= 2,
half_seconds_remaining > 120
) %>%
group_by(posteam) %>%
summarize(
mean_pass = mean(pass),
plays = n(),
.groups = "drop"
) %>%
arrange(desc(mean_pass))
glimpse(mydata_summary)
# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------
mydata_summary <- mydata_summary %>%
mutate(
posteam = factor(
posteam,
levels = posteam[order(mean_pass, decreasing = TRUE)]
)
)
# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------
graph <- plot_ly(
data = mydata_summary,
x = ~posteam,
y = ~mean_pass,
type = "scatter",
mode = "text",
text = ~posteam,
textposition = "middle center",
hovertemplate = paste(
"<b>%{text}</b><br>",
"Pass rate: %{y:.1%}<br>",
"Plays: %{customdata}<extra></extra>"
),
customdata = ~plays
) %>%
layout(
title = "Tendency to pass, by NFL team, 2025",
xaxis = list(
title = "Team",
showticklabels = FALSE
),
yaxis = list(
title = "Percent pass plays",
tickformat = ".0%"
)
)
graph
This graph shows the top 10 home run hitters in the MLB during the 2025 season. Cal Raleigh leads the graph with 60 home runs.
############################################################
## 1. Check for Required Packages and Install if Missing
############################################################
# Check for Lahman (historical MLB data)
if (!requireNamespace("Lahman", quietly = TRUE)) {
install.packages("Lahman")
}
# Check for tidyverse
if (!requireNamespace("tidyverse", quietly = TRUE)) {
install.packages("tidyverse")
}
############################################################
## 2. Load Required Packages
############################################################
library(Lahman)
library(tidyverse)
############################################################
## 3. Define the Season of Interest
############################################################
# Most recently completed MLB season
season_year <- 2025
############################################################
## 4. Pull Season-Level Batting Data (Lahman)
############################################################
# The Batting table contains one row per player per season per team
batting_data <- Batting %>%
filter(yearID == season_year)
############################################################
## 5. Inspect and Filter the Data
############################################################
# Calculate plate appearances (AB + BB + HBP + SF)
qualified_hitters <- batting_data %>%
mutate(
PA = AB + BB + HBP + SF
) %>%
filter(PA >= 400)
############################################################
## 6. Summarize: Identify the Top Home Run Hitters
############################################################
# Players may appear multiple times if traded; combine them
hr_leaders <- qualified_hitters %>%
group_by(playerID) %>%
summarise(
HR = sum(HR),
PA = sum(PA),
.groups = "drop"
) %>%
arrange(desc(HR)) %>%
slice_head(n = 10) %>%
left_join(People, by = "playerID") %>%
mutate(
Name = paste(nameFirst, nameLast)
) %>%
select(Name, PA, HR)
############################################################
## 7. Create the Visualization
############################################################
HRPlot <- ggplot(hr_leaders,
aes(x = reorder(Name, HR), y = HR)) +
geom_col(fill = "firebrick") +
coord_flip() +
labs(
title = "Top 10 MLB Home Run Hitters",
subtitle = "2025 Regular Season (Minimum 400 PA)",
x = "Player",
y = "Home Runs",
caption = "Source: Lahman Baseball Database"
) +
theme_minimal()
HRPlot
############################################################
## 8. Optional Extension Ideas (Not Executed)
############################################################
#
# - Compare HR with RBI or runs scored
# - Summarize by teamID
# - Compute HR per PA (rate stat)
# - Compare two seasons side by side
#
############################################################
This map shows the results of the 2024 presidential election from voting precincts within Davidson County and Rutherford County. Blue represents precincts that voted primarily for Democrat candidate Kamala Harris, and red represents precincts that voted primarily for Republican candidate Donald Trump.
# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("sf")) install.packages("sf")
if (!require("leaflet")) install.packages("leaflet")
if (!require("leafpop")) install.packages("leafpop")
library(tidyverse)
library(sf)
library(leaflet)
library(leafpop)
# ----------------------------------------------------------
# Step 2: Load precinct-level vote data for Rutherford County
# ----------------------------------------------------------
VoteData <- read.csv(
"https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Vote_Data.csv"
)
# ----------------------------------------------------------
# Step 3: Download and load Rutherford County precinct shapefile
# ----------------------------------------------------------
download.file(
"https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Precincts.zip",
"RutherfordPrecinctMap.zip",
mode = "wb"
)
unzip("RutherfordPrecinctMap.zip")
MapInfo <- read_sf("Rutherford_Precincts.shp")
MapInfo <- MapInfo %>%
select(Precinct, geometry)
# ----------------------------------------------------------
# Step 4: Join Rutherford vote data to map and transform CRS
# ----------------------------------------------------------
DataAndMapRuCo <- left_join(
VoteData,
MapInfo,
by = "Precinct") %>%
st_as_sf() %>%
st_transform(4326)
# ----------------------------------------------------------
# Step 5: Load precinct-level vote data for Davidson County
# ----------------------------------------------------------
VoteData <- read.csv(
"https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Vote_Data.csv"
)
# ----------------------------------------------------------
# Step 6: Download and load Davidson County precinct shapefile
# ----------------------------------------------------------
download.file(
"https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Precincts.zip",
"DavidsonPrecinctMap.zip",
mode = "wb"
)
unzip("DavidsonPrecinctMap.zip")
MapInfo <- read_sf("Davidson_Precincts.shp")
MapInfo <- MapInfo %>%
select(Precinct, geometry)
# ----------------------------------------------------------
# Step 7: Join Davidson vote data to map and transform CRS
# ----------------------------------------------------------
DataAndMapDavidson <- left_join(
VoteData,
MapInfo,
by = "Precinct") %>%
st_as_sf() %>%
st_transform(4326)
# ----------------------------------------------------------
# Step 8: Combine Rutherford and Davidson precinct files
# ----------------------------------------------------------
DataAndMap <- DataAndMapRuCo %>%
bind_rows(DataAndMapDavidson) %>%
st_as_sf() %>%
st_transform(4326)
# ----------------------------------------------------------
# Step 9: Compute totals, vote shares, winner, and TRUE margin
# ----------------------------------------------------------
MarginData <- DataAndMap %>%
mutate(
Total = Trump + Harris + Other,
Pct_Trump = (Trump / Total) * 100,
Pct_Harris = (Harris / Total) * 100
) %>%
mutate(
Winner = case_when(
Trump > Harris ~ "Republican",
Harris > Trump ~ "Democratic",
TRUE ~ "Tie"
),
Margin_Pct = round(abs(Pct_Trump - Pct_Harris), 1)
)
# ----------------------------------------------------------
# Step 10: Create party-aware color palettes
# ----------------------------------------------------------
dem_pal <- colorNumeric(
palette = c("#deebf7", "#08519c"),
domain = MarginData$Margin_Pct
)
rep_pal <- colorNumeric(
palette = c("#fee0d2", "#99000d"),
domain = MarginData$Margin_Pct
)
# ----------------------------------------------------------
# Step 11: Assign fill colors using dplyr
# ----------------------------------------------------------
MarginData <- MarginData %>%
mutate(
FillColor = case_when(
Winner == "Democratic" ~ dem_pal(Margin_Pct),
Winner == "Republican" ~ rep_pal(Margin_Pct),
TRUE ~ "gray"
)
)
# ----------------------------------------------------------
# Step 12: Create popup table (with raw vote counts)
# ----------------------------------------------------------
PopupData <- MarginData %>%
st_drop_geometry() %>%
select(
County,
Precinct,
Winner,
Trump,
Harris,
Other,
`Margin (percentage points)` = Margin_Pct
)
popup_content <- popupTable(
PopupData,
feature.id = FALSE,
row.numbers = FALSE
)
# ----------------------------------------------------------
# Step 13: TRUE margin choropleth map
# ----------------------------------------------------------
TrueMarginMap <- leaflet(MarginData) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(
fillColor = ~FillColor,
fillOpacity = 0.7,
color = "black",
weight = 0.4,
popup = popup_content
) %>%
addLegend(
colors = c("#08519c", "#99000d"),
labels = c("Democratic winner", "Republican winner"),
title = "Winner (by margin)",
opacity = 1
)
TrueMarginMap