1. The first graph is a representation of each NFL teams tendency to run/pass the football. The teams who are top left are the teams who pass the ball more compared to the teams in the bottom right who run the ball more.
  2. The second graph shows the batters, from the 2025 season, who hit the most home runs.
  3. The last graph is a map that shows the winner of each precinct in Davidson and Rutherford County during the 2024 Election.




Code:

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")

library(tidyverse)
library(nflreadr)
library(plotly)

# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------

# Turn off scientific notation
options(scipen = 9999)

# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------

# Year can be adjusted as needed
data <- load_pbp(2025)

# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------

pbp_rp <- data %>%
  filter((rush == 1 | pass == 1), !is.na(epa))

# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------

mydata <- pbp_rp %>% 
  select(
    posteam,
    pass,
    wp,
    qtr,
    down,
    half_seconds_remaining
  )

glimpse(mydata)

# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half, 
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?

mydata_summary <- mydata %>%
  filter(
    wp > 0.20,
    wp < 0.80,
    down <= 2,
    qtr <= 2,
    half_seconds_remaining > 120
  ) %>%
  group_by(posteam) %>%
  summarize(
    mean_pass = mean(pass),
    plays = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_pass))

glimpse(mydata_summary)

# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------

mydata_summary <- mydata_summary %>%
  mutate(
    posteam = factor(
      posteam,
      levels = posteam[order(mean_pass, decreasing = TRUE)]
    )
  )

# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------

graph <- plot_ly(
  data = mydata_summary,
  x = ~posteam,
  y = ~mean_pass,
  type = "scatter",
  mode = "text",
  text = ~posteam,
  textposition = "middle center",
  hovertemplate = paste(
    "<b>%{text}</b><br>",
    "Pass rate: %{y:.1%}<br>",
    "Plays: %{customdata}<extra></extra>"
  ),
  customdata = ~plays
) %>%
  layout(
    title = "Tendency to pass, by NFL team, 2025",
    xaxis = list(
      title = "Team",
      showticklabels = FALSE
    ),
    yaxis = list(
      title = "Percent pass plays",
      tickformat = ".0%"
    )
  )

graph
############################################################
## 1. Check for Required Packages and Install if Missing
############################################################

# Check for Lahman (historical MLB data)
if (!requireNamespace("Lahman", quietly = TRUE)) {
  install.packages("Lahman")
}

# Check for tidyverse
if (!requireNamespace("tidyverse", quietly = TRUE)) {
  install.packages("tidyverse")
}


############################################################
## 2. Load Required Packages
############################################################

library(Lahman)
library(tidyverse)


############################################################
## 3. Define the Season of Interest
############################################################

# Most recently completed MLB season
season_year <- 2025


############################################################
## 4. Pull Season-Level Batting Data (Lahman)
############################################################

# The Batting table contains one row per player per season per team
batting_data <- Batting %>%
  filter(yearID == season_year)


############################################################
## 5. Inspect and Filter the Data
############################################################

# Calculate plate appearances (AB + BB + HBP + SF)
qualified_hitters <- batting_data %>%
  mutate(
    PA = AB + BB + HBP + SF
  ) %>%
  filter(PA >= 400)


############################################################
## 6. Summarize: Identify the Top Home Run Hitters
############################################################

# Players may appear multiple times if traded; combine them
hr_leaders <- qualified_hitters %>%
  group_by(playerID) %>%
  summarise(
    HR = sum(HR),
    PA = sum(PA),
    .groups = "drop"
  ) %>%
  arrange(desc(HR)) %>%
  slice_head(n = 10) %>%
  left_join(People, by = "playerID") %>%
  mutate(
    Name = paste(nameFirst, nameLast)
  ) %>%
  select(Name, PA, HR)


############################################################
## 7. Create the Visualization
############################################################

HRPlot <- ggplot(hr_leaders,
                 aes(x = reorder(Name, HR), y = HR)) +
  geom_col(fill = "firebrick") +
  coord_flip() +
  labs(
    title = "Top 10 MLB Home Run Hitters",
    subtitle = "2025 Regular Season (Minimum 400 PA)",
    x = "Player",
    y = "Home Runs",
    caption = "Source: Lahman Baseball Database"
  ) +
  theme_minimal()

HRPlot

############################################################
## 8. Optional Extension Ideas (Not Executed)
############################################################
#
# - Compare HR with RBI or runs scored
# - Summarize by teamID
# - Compute HR per PA (rate stat)
# - Compare two seasons side by side
#
############################################################
{r,include=FALSE}

# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("sf")) install.packages("sf")
if (!require("leaflet")) install.packages("leaflet")
if (!require("leafpop")) install.packages("leafpop")

library(tidyverse)
library(sf)
library(leaflet)
library(leafpop)

# ----------------------------------------------------------
# Step 2: Load precinct-level vote data for Rutherford County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 3: Download and load Rutherford County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Rutherford_Precincts.zip",
  "RutherfordPrecinctMap.zip",
  mode = "wb"
)

unzip("RutherfordPrecinctMap.zip")

MapInfo <- read_sf("Rutherford_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 4: Join Rutherford vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapRuCo <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 5: Load precinct-level vote data for Davidson County
# ----------------------------------------------------------

VoteData <- read.csv(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Vote_Data.csv"
)

# ----------------------------------------------------------
# Step 6: Download and load Davidson County precinct shapefile
# ----------------------------------------------------------

download.file(
  "https://github.com/drkblake/Data/raw/refs/heads/main/Davidson_Precincts.zip",
  "DavidsonPrecinctMap.zip",
  mode = "wb"
)

unzip("DavidsonPrecinctMap.zip")

MapInfo <- read_sf("Davidson_Precincts.shp")
MapInfo <- MapInfo %>% 
  select(Precinct, geometry)

# ----------------------------------------------------------
# Step 7: Join Davidson vote data to map and transform CRS
# ----------------------------------------------------------

DataAndMapDavidson <- left_join(
  VoteData,
  MapInfo,
  by = "Precinct") %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 8: Combine Rutherford and Davidson precinct files
# ----------------------------------------------------------

DataAndMap <- DataAndMapRuCo %>%
  bind_rows(DataAndMapDavidson) %>%
  st_as_sf() %>%
  st_transform(4326)

# ----------------------------------------------------------
# Step 9: Compute totals, vote shares, winner, and TRUE margin
# ----------------------------------------------------------

MarginData <- DataAndMap %>%
  mutate(
    Total = Trump + Harris + Other,
    Pct_Trump  = (Trump  / Total) * 100,
    Pct_Harris = (Harris / Total) * 100
  ) %>%
  mutate(
    Winner = case_when(
      Trump > Harris ~ "Republican",
      Harris > Trump ~ "Democratic",
      TRUE           ~ "Tie"
    ),
    Margin_Pct = round(abs(Pct_Trump - Pct_Harris), 1)
  )

# ----------------------------------------------------------
# Step 10: Create party-aware color palettes
# ----------------------------------------------------------

dem_pal <- colorNumeric(
  palette = c("#deebf7", "#08519c"),
  domain  = MarginData$Margin_Pct
)

rep_pal <- colorNumeric(
  palette = c("#fee0d2", "#99000d"),
  domain  = MarginData$Margin_Pct
)

# ----------------------------------------------------------
# Step 11: Assign fill colors using dplyr
# ----------------------------------------------------------

MarginData <- MarginData %>%
  mutate(
    FillColor = case_when(
      Winner == "Democratic" ~ dem_pal(Margin_Pct),
      Winner == "Republican" ~ rep_pal(Margin_Pct),
      TRUE                   ~ "gray"
    )
  )

# ----------------------------------------------------------
# Step 12: Create popup table (with raw vote counts)
# ----------------------------------------------------------

PopupData <- MarginData %>%
  st_drop_geometry() %>%
  select(
    County,
    Precinct,
    Winner,
    Trump,
    Harris,
    Other,
    `Margin (percentage points)` = Margin_Pct
  )

popup_content <- popupTable(
  PopupData,
  feature.id = FALSE,
  row.numbers = FALSE
)

# ----------------------------------------------------------
# Step 13: TRUE margin choropleth map
# ----------------------------------------------------------

TrueMarginMap <- leaflet(MarginData) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor   = ~FillColor,
    fillOpacity = 0.7,
    color       = "black",
    weight      = 0.4,
    popup       = popup_content
  ) %>%
  addLegend(
    colors = c("#08519c", "#99000d"),
    labels = c("Democratic winner", "Republican winner"),
    title  = "Winner (by margin)",
    opacity = 1
  )

TrueMarginMap