NBA Scorigami Analysis

Inspired by the NFL Scorigami concept, this visualization shows every final score combination that has occurred in NBA history, with the goal of identifying scores that have never happened.

Original idea

# Run this once to install required packages
install.packages(c("plotly", "ggplot2", "dplyr", "viridis", "MASS"))
library(ggplot2)
library(plotly)
library(dplyr)
library(viridis)
library(MASS)

Load and Prepare Data

all_games <- read.csv("games.csv")

# Determine winner and loser scores for each game
all_games <- all_games %>%
  mutate(
    winner_score = pmax(homeScore, awayScore),
    loser_score = pmin(homeScore, awayScore),
    game_type_label = ifelse(grepl("Playoff", gameType, ignore.case = TRUE), "Playoff", "Regular Season")
  )

# Check data
head(all_games)
##     gameId            gameDate  hometeamCity hometeamName hometeamId
## 1 42400407 2025-06-22 20:00:00 Oklahoma City      Thunder 1610612760
## 2 42400406 2025-06-19 20:30:00       Indiana       Pacers 1610612754
## 3 42400405 2025-06-16 20:30:00 Oklahoma City      Thunder 1610612760
## 4 42400404 2025-06-13 20:30:00       Indiana       Pacers 1610612754
## 5 42400403 2025-06-11 20:30:00       Indiana       Pacers 1610612754
## 6 42400402 2025-06-08 20:00:00 Oklahoma City      Thunder 1610612760
##    awayteamCity awayteamName awayteamId homeScore awayScore     winner gameType
## 1       Indiana       Pacers 1610612754       103        91 1610612760 Playoffs
## 2 Oklahoma City      Thunder 1610612760       108        91 1610612754 Playoffs
## 3       Indiana       Pacers 1610612754       120       109 1610612760 Playoffs
## 4 Oklahoma City      Thunder 1610612760       104       111 1610612760 Playoffs
## 5 Oklahoma City      Thunder 1610612760       116       107 1610612754 Playoffs
## 6       Indiana       Pacers 1610612754       123       107 1610612760 Playoffs
##   attendance arenaId  gameLabel gameSubLabel seriesGameNumber winner_score
## 1      18203 1000052 NBA Finals       Game 7                7          103
## 2      17274 1000063 NBA Finals       Game 6                6          108
## 3      18203 1000052 NBA Finals       Game 5                5          120
## 4      17274 1000063 NBA Finals       Game 4                4          111
## 5      17274 1000063 NBA Finals       Game 3                3          116
## 6      18203 1000052 NBA Finals       Game 2                2          123
##   loser_score game_type_label
## 1          91         Playoff
## 2          91         Playoff
## 3         109         Playoff
## 4         104         Playoff
## 5         107         Playoff
## 6         107         Playoff
summary(all_games[, c("winner_score", "loser_score", "gameType")])
##   winner_score    loser_score       gameType        
##  Min.   :  0.0   Min.   :  0.00   Length:71879      
##  1st Qu.:101.0   1st Qu.: 90.00   Class :character  
##  Median :109.0   Median : 98.00   Mode  :character  
##  Mean   :109.5   Mean   : 98.57                     
##  3rd Qu.:118.0   3rd Qu.:107.00                     
##  Max.   :186.0   Max.   :184.00
table(all_games$game_type_label)
## 
##        Playoff Regular Season 
##           4441          67438

Create Score Frequency Table

# Create a comprehensive grid of all possible scores
min_score <- 19
max_score <- 186

# Count occurrences of each score combination
score_combinations <- all_games %>%
  group_by(winner_score, loser_score) %>%
  summarise(
    total_games = n(),
    regular_season_games = sum(game_type_label == "Regular Season"),
    playoff_games = sum(game_type_label == "Playoff"),
    .groups = "drop"
  )

# Create full grid including scores that have never happened
all_possible_scores <- expand.grid(
  winner_score = min_score:max_score,
  loser_score = min_score:max_score
) %>%
  filter(winner_score > loser_score)  # Winner must have more points

# Merge with actual data
score_tally <- all_possible_scores %>%
  left_join(score_combinations, by = c("winner_score", "loser_score")) %>%
  mutate(
    total_games = ifelse(is.na(total_games), 0, total_games),
    regular_season_games = ifelse(is.na(regular_season_games), 0, regular_season_games),
    playoff_games = ifelse(is.na(playoff_games), 0, playoff_games),
    has_occurred = total_games > 0,
    margin = winner_score - loser_score
  )

# Summary statistics
cat(sprintf("Total possible score combinations: %d\n", nrow(score_tally)))
## Total possible score combinations: 14028
cat(sprintf("Scores that have occurred: %d\n", sum(score_tally$has_occurred)))
## Scores that have occurred: 3013
cat(sprintf("Scores that have NOT occurred: %d\n", sum(!score_tally$has_occurred)))
## Scores that have NOT occurred: 11015
cat(sprintf("Percentage of possible scores achieved: %.1f%%\n", 
            100 * sum(score_tally$has_occurred) / nrow(score_tally)))
## Percentage of possible scores achieved: 21.5%

2D Heatmap Preview

# Create complete grid with all three categories
full_grid <- expand.grid(
  loser_score = min_score:max_score,
  winner_score = min_score:max_score
) %>%
  left_join(score_tally, by = c("loser_score", "winner_score")) %>%
  mutate(
    # Categorize each cell
    category = case_when(
      loser_score >= winner_score ~ "impossible",
      is.na(has_occurred) | !has_occurred ~ "never_happened",
      TRUE ~ "occurred"
    ),
    # Create display value: use actual frequency for occurred, -1 for never happened, -2 for impossible
    display_value = case_when(
      category == "occurred" ~ total_games,
      category == "never_happened" ~ -1,
      category == "impossible" ~ -2
    ),
    # Create hover text
    hover_text = case_when(
      category == "occurred" ~ sprintf(
        "Winner: %d | Loser: %d<br>Total Games: %d<br>Regular Season: %d | Playoff: %d<br>Margin: %d",
        winner_score, loser_score, total_games, regular_season_games, playoff_games, margin
      ),
      category == "never_happened" ~ sprintf(
        "Winner: %d | Loser: %d<br><b>NEVER HAPPENED</b><br>Margin: %d",
        winner_score, loser_score, winner_score - loser_score
      ),
      category == "impossible" ~ sprintf(
        "Winner: %d | Loser: %d<br><b>IMPOSIBLE SCORE</b>",
        winner_score, loser_score
      )
    ),
    # Replace NA values
    total_games = ifelse(is.na(total_games), 0, total_games),
    regular_season_games = ifelse(is.na(regular_season_games), 0, regular_season_games),
    playoff_games = ifelse(is.na(playoff_games), 0, playoff_games)
  )

# Get max value for color scale
max_games <- max(full_grid$total_games[full_grid$category == "occurred"])

# Create custom colorscale: black for -2, white for -1, viridis for 0+
custom_colorscale <- list(
  c(0, "black"),           # -2 (impossible)
  c(0.00001, "black"),     
  c(0.00002, "white"),     # -1 (never happened)
  c(0.49999, "white"),
  c(0.5, "#440154"),       # Start of viridis (0 games)
  c(0.625, "#31688e"),
  c(0.75, "#35b779"),
  c(0.875, "#fde724"),
  c(1, "#fde724")          # Max games
)

# Normalize display values to 0-1 range for color mapping
full_grid <- full_grid %>%
  mutate(
    normalized_value = case_when(
      display_value == -2 ~ 0,
      display_value == -1 ~ 0.25,
      TRUE ~ 0.5 + 0.5 * (display_value / max_games)
    )
  )

heatmap_fig <- plot_ly(
  data = full_grid,
  x = ~loser_score,
  y = ~winner_score,
  z = ~normalized_value,
  type = "heatmap",
  colorscale = custom_colorscale,
  text = ~hover_text,
  hoverinfo = "text",
  showscale = FALSE
) %>%
  layout(
    title = "NBA Score Frequency Heatmap (white = never happened, black = impossible)",
    xaxis = list(title = "Losing Team's Score"),
    yaxis = list(title = "Winning Team's Score")
  )

# Add a separate color bar for the actual game frequencies
heatmap_fig <- heatmap_fig %>%
  add_trace(
    data = full_grid %>% filter(category == "occurred"),
    x = ~loser_score,
    y = ~winner_score,
    z = ~total_games,
    type = "heatmap",
    colorscale = "Viridis",
    showscale = TRUE,
    colorbar = list(title = "Number<br>of Games"),
    visible = "legendonly",
    name = "Legend"
  )

heatmap_fig

Interactive 3D Scorigami Visualization

# Prepare data for plotting - separate occurred vs not occurred
occurred_scores <- score_tally %>% filter(has_occurred)
not_occurred_scores <- score_tally %>% filter(!has_occurred)

# Create hover text
occurred_scores <- occurred_scores %>%
  mutate(
    hover_text = sprintf(
      "Score: %d-%d<br>Total Games: %d<br>Regular Season: %d<br>Playoff: %d<br>Margin: %d",
      winner_score, loser_score, total_games, regular_season_games, playoff_games, margin
    )
  )

not_occurred_scores <- not_occurred_scores %>%
  mutate(
    hover_text = sprintf("Score: %d-%d<br>NEVER OCCURRED<br>Margin: %d",
                        winner_score, loser_score, margin)
  )

# Create 3D scatter plot with Plotly
fig <- plot_ly()

# Add occurred scores
fig <- fig %>%
  add_trace(
    data = occurred_scores,
    x = ~loser_score,
    y = ~winner_score,
    z = ~total_games,
    type = "scatter3d",
    mode = "markers",
    marker = list(
      size = 3,
      color = ~total_games,
      colorscale = "Viridis",
      showscale = TRUE,
      colorbar = list(title = "Frequency")
    ),
    text = ~hover_text,
    hoverinfo = "text",
    name = "Occurred Scores"
  )

# Add not occurred scores as small gray points at z=0
fig <- fig %>%
  add_trace(
    data = not_occurred_scores %>% sample_n(min(1000, nrow(not_occurred_scores))),  # Sample for performance
    x = ~loser_score,
    y = ~winner_score,
    z = 0,
    type = "scatter3d",
    mode = "markers",
    marker = list(
      size = 1,
      color = "lightgray",
      opacity = 0.3
    ),
    text = ~hover_text,
    hoverinfo = "text",
    name = "Never Occurred (sample)",
    showlegend = TRUE
  )

# Configure layout
fig <- fig %>%
  layout(
    title = "NBA Scorigami - 3D Score Frequency Visualization",
    scene = list(
      xaxis = list(title = "Losing Team's Score"),
      yaxis = list(title = "Winning Team's Score"),
      zaxis = list(title = "Frequency (Number of Games)"),
      camera = list(
        eye = list(x = 1.5, y = 1.5, z = 1.3)
      )
    )
  )

fig

Most Common Scores

most_common <- occurred_scores %>%
  arrange(desc(total_games)) %>%
  dplyr::select(winner_score, loser_score, total_games, regular_season_games, playoff_games, margin) %>%
  head(20)

knitr::kable(most_common, 
             col.names = c("Winner", "Loser", "Total", "Regular Season", "Playoff", "Margin"),
             caption = "20 Most Common Score Combinations")
20 Most Common Score Combinations
Winner Loser Total Regular Season Playoff Margin
108 101 166 156 10 7
104 101 160 151 9 3
104 98 158 146 12 6
105 100 158 151 7 5
106 104 158 151 7 2
100 98 155 147 8 2
108 103 154 144 10 5
105 99 153 141 12 6
100 93 152 141 11 7
106 101 152 141 11 5
105 103 152 144 8 2
106 103 152 142 10 3
102 100 150 145 5 2
99 94 149 142 7 5
106 98 148 141 7 8
101 99 148 139 9 2
104 102 148 139 9 2
105 98 147 140 7 7
106 102 147 139 8 4
103 96 146 136 10 7

Rarest Scores (That Have Occurred)

rarest <- occurred_scores %>%
  filter(total_games == 1) %>%
  arrange(desc(winner_score), desc(margin)) %>%
  dplyr::select(winner_score, loser_score, total_games, game_type_label = regular_season_games, margin) %>%
  head(20)

knitr::kable(rarest,
             col.names = c("Winner", "Loser", "Total", "Was Regular Season?", "Margin"),
             caption = "Sample of Scores That Have Occurred Only Once")
Sample of Scores That Have Occurred Only Once
Winner Loser Total Was Regular Season? Margin
186 184 1 1 2
176 175 1 1 1
173 139 1 1 34
173 143 1 1 30
171 166 1 1 5
169 147 1 1 22
168 116 1 1 52
168 161 1 1 7
165 151 1 1 14
163 125 1 1 38
163 148 1 1 15
163 155 1 1 8
162 99 1 1 63
162 100 1 1 62
162 109 1 1 53
162 135 1 1 27
162 143 1 1 19
162 158 1 1 4
161 133 1 1 28
161 153 1 1 8

Most Likely Scores That Have Never Occurred

Using a simple probability model based on score distributions:

library(MASS)

# Fit distributions (only on occurred scores)
winner_fit <- fitdistr(all_games$winner_score, "normal")
loser_fit <- fitdistr(all_games$loser_score, "normal")

# Calculate approximate likelihood for each score combination
score_tally <- score_tally %>%
  mutate(
    likelihood = dnorm(winner_score, winner_fit$estimate[1], winner_fit$estimate[2]) *
                 dnorm(loser_score, loser_fit$estimate[1], loser_fit$estimate[2])
  )

# Most likely scores that haven't occurred
most_likely_missing <- score_tally %>%
  filter(!has_occurred) %>%
  arrange(desc(likelihood)) %>%
  dplyr::select(winner_score, loser_score, margin, likelihood) %>%
  head(20)

knitr::kable(most_likely_missing,
             col.names = c("Winner", "Loser", "Margin", "Likelihood Score"),
             caption = "Most Likely Scores That Have Never Occurred",
             digits = 6)
Most Likely Scores That Have Never Occurred
Winner Loser Margin Likelihood Score
126 90 36 0.000346
124 83 41 0.000248
128 87 41 0.000237
122 81 41 0.000236
131 93 38 0.000230
126 84 42 0.000228
125 83 42 0.000228
127 85 42 0.000225
130 89 41 0.000216
122 80 42 0.000212
125 82 43 0.000207
131 90 41 0.000202
119 78 41 0.000202
129 86 43 0.000198
113 76 37 0.000194
131 89 42 0.000191
120 78 42 0.000191
118 77 41 0.000187
129 85 44 0.000183
121 78 43 0.000179

Score Distribution by Game Type

game_type_summary <- all_games %>%
  group_by(game_type_label) %>%
  summarise(
    n_games = n(),
    mean_winner = mean(winner_score),
    mean_loser = mean(loser_score),
    mean_margin = mean(winner_score - loser_score),
    .groups = "drop"
  )

knitr::kable(game_type_summary, digits = 2,
             caption = "Score Statistics by Game Type")
Score Statistics by Game Type
game_type_label n_games mean_winner mean_loser mean_margin
Playoff 4441 106.87 95.90 10.97
Regular Season 67438 109.73 98.75 10.98