NBA Scorigami Analysis
Inspired by the NFL Scorigami concept, this visualization shows every
final score combination that has occurred in NBA history, with the goal
of identifying scores that have never happened.
Original
idea
# Run this once to install required packages
install.packages(c("plotly", "ggplot2", "dplyr", "viridis", "MASS"))
library(ggplot2)
library(plotly)
library(dplyr)
library(viridis)
library(MASS)
Load and Prepare Data
all_games <- read.csv("games.csv")
# Determine winner and loser scores for each game
all_games <- all_games %>%
mutate(
winner_score = pmax(homeScore, awayScore),
loser_score = pmin(homeScore, awayScore),
game_type_label = ifelse(grepl("Playoff", gameType, ignore.case = TRUE), "Playoff", "Regular Season")
)
# Check data
head(all_games)
## gameId gameDate hometeamCity hometeamName hometeamId
## 1 42400407 2025-06-22 20:00:00 Oklahoma City Thunder 1610612760
## 2 42400406 2025-06-19 20:30:00 Indiana Pacers 1610612754
## 3 42400405 2025-06-16 20:30:00 Oklahoma City Thunder 1610612760
## 4 42400404 2025-06-13 20:30:00 Indiana Pacers 1610612754
## 5 42400403 2025-06-11 20:30:00 Indiana Pacers 1610612754
## 6 42400402 2025-06-08 20:00:00 Oklahoma City Thunder 1610612760
## awayteamCity awayteamName awayteamId homeScore awayScore winner gameType
## 1 Indiana Pacers 1610612754 103 91 1610612760 Playoffs
## 2 Oklahoma City Thunder 1610612760 108 91 1610612754 Playoffs
## 3 Indiana Pacers 1610612754 120 109 1610612760 Playoffs
## 4 Oklahoma City Thunder 1610612760 104 111 1610612760 Playoffs
## 5 Oklahoma City Thunder 1610612760 116 107 1610612754 Playoffs
## 6 Indiana Pacers 1610612754 123 107 1610612760 Playoffs
## attendance arenaId gameLabel gameSubLabel seriesGameNumber winner_score
## 1 18203 1000052 NBA Finals Game 7 7 103
## 2 17274 1000063 NBA Finals Game 6 6 108
## 3 18203 1000052 NBA Finals Game 5 5 120
## 4 17274 1000063 NBA Finals Game 4 4 111
## 5 17274 1000063 NBA Finals Game 3 3 116
## 6 18203 1000052 NBA Finals Game 2 2 123
## loser_score game_type_label
## 1 91 Playoff
## 2 91 Playoff
## 3 109 Playoff
## 4 104 Playoff
## 5 107 Playoff
## 6 107 Playoff
summary(all_games[, c("winner_score", "loser_score", "gameType")])
## winner_score loser_score gameType
## Min. : 0.0 Min. : 0.00 Length:71879
## 1st Qu.:101.0 1st Qu.: 90.00 Class :character
## Median :109.0 Median : 98.00 Mode :character
## Mean :109.5 Mean : 98.57
## 3rd Qu.:118.0 3rd Qu.:107.00
## Max. :186.0 Max. :184.00
table(all_games$game_type_label)
##
## Playoff Regular Season
## 4441 67438
Create Score Frequency Table
# Create a comprehensive grid of all possible scores
min_score <- 19
max_score <- 186
# Count occurrences of each score combination
score_combinations <- all_games %>%
group_by(winner_score, loser_score) %>%
summarise(
total_games = n(),
regular_season_games = sum(game_type_label == "Regular Season"),
playoff_games = sum(game_type_label == "Playoff"),
.groups = "drop"
)
# Create full grid including scores that have never happened
all_possible_scores <- expand.grid(
winner_score = min_score:max_score,
loser_score = min_score:max_score
) %>%
filter(winner_score > loser_score) # Winner must have more points
# Merge with actual data
score_tally <- all_possible_scores %>%
left_join(score_combinations, by = c("winner_score", "loser_score")) %>%
mutate(
total_games = ifelse(is.na(total_games), 0, total_games),
regular_season_games = ifelse(is.na(regular_season_games), 0, regular_season_games),
playoff_games = ifelse(is.na(playoff_games), 0, playoff_games),
has_occurred = total_games > 0,
margin = winner_score - loser_score
)
# Summary statistics
cat(sprintf("Total possible score combinations: %d\n", nrow(score_tally)))
## Total possible score combinations: 14028
cat(sprintf("Scores that have occurred: %d\n", sum(score_tally$has_occurred)))
## Scores that have occurred: 3013
cat(sprintf("Scores that have NOT occurred: %d\n", sum(!score_tally$has_occurred)))
## Scores that have NOT occurred: 11015
cat(sprintf("Percentage of possible scores achieved: %.1f%%\n",
100 * sum(score_tally$has_occurred) / nrow(score_tally)))
## Percentage of possible scores achieved: 21.5%
2D Heatmap Preview
# Create complete grid with all three categories
full_grid <- expand.grid(
loser_score = min_score:max_score,
winner_score = min_score:max_score
) %>%
left_join(score_tally, by = c("loser_score", "winner_score")) %>%
mutate(
# Categorize each cell
category = case_when(
loser_score >= winner_score ~ "impossible",
is.na(has_occurred) | !has_occurred ~ "never_happened",
TRUE ~ "occurred"
),
# Create display value: use actual frequency for occurred, -1 for never happened, -2 for impossible
display_value = case_when(
category == "occurred" ~ total_games,
category == "never_happened" ~ -1,
category == "impossible" ~ -2
),
# Create hover text
hover_text = case_when(
category == "occurred" ~ sprintf(
"Winner: %d | Loser: %d<br>Total Games: %d<br>Regular Season: %d | Playoff: %d<br>Margin: %d",
winner_score, loser_score, total_games, regular_season_games, playoff_games, margin
),
category == "never_happened" ~ sprintf(
"Winner: %d | Loser: %d<br><b>NEVER HAPPENED</b><br>Margin: %d",
winner_score, loser_score, winner_score - loser_score
),
category == "impossible" ~ sprintf(
"Winner: %d | Loser: %d<br><b>IMPOSIBLE SCORE</b>",
winner_score, loser_score
)
),
# Replace NA values
total_games = ifelse(is.na(total_games), 0, total_games),
regular_season_games = ifelse(is.na(regular_season_games), 0, regular_season_games),
playoff_games = ifelse(is.na(playoff_games), 0, playoff_games)
)
# Get max value for color scale
max_games <- max(full_grid$total_games[full_grid$category == "occurred"])
# Create custom colorscale: black for -2, white for -1, viridis for 0+
custom_colorscale <- list(
c(0, "black"), # -2 (impossible)
c(0.00001, "black"),
c(0.00002, "white"), # -1 (never happened)
c(0.49999, "white"),
c(0.5, "#440154"), # Start of viridis (0 games)
c(0.625, "#31688e"),
c(0.75, "#35b779"),
c(0.875, "#fde724"),
c(1, "#fde724") # Max games
)
# Normalize display values to 0-1 range for color mapping
full_grid <- full_grid %>%
mutate(
normalized_value = case_when(
display_value == -2 ~ 0,
display_value == -1 ~ 0.25,
TRUE ~ 0.5 + 0.5 * (display_value / max_games)
)
)
heatmap_fig <- plot_ly(
data = full_grid,
x = ~loser_score,
y = ~winner_score,
z = ~normalized_value,
type = "heatmap",
colorscale = custom_colorscale,
text = ~hover_text,
hoverinfo = "text",
showscale = FALSE
) %>%
layout(
title = "NBA Score Frequency Heatmap (white = never happened, black = impossible)",
xaxis = list(title = "Losing Team's Score"),
yaxis = list(title = "Winning Team's Score")
)
# Add a separate color bar for the actual game frequencies
heatmap_fig <- heatmap_fig %>%
add_trace(
data = full_grid %>% filter(category == "occurred"),
x = ~loser_score,
y = ~winner_score,
z = ~total_games,
type = "heatmap",
colorscale = "Viridis",
showscale = TRUE,
colorbar = list(title = "Number<br>of Games"),
visible = "legendonly",
name = "Legend"
)
heatmap_fig
Interactive 3D Scorigami Visualization
# Prepare data for plotting - separate occurred vs not occurred
occurred_scores <- score_tally %>% filter(has_occurred)
not_occurred_scores <- score_tally %>% filter(!has_occurred)
# Create hover text
occurred_scores <- occurred_scores %>%
mutate(
hover_text = sprintf(
"Score: %d-%d<br>Total Games: %d<br>Regular Season: %d<br>Playoff: %d<br>Margin: %d",
winner_score, loser_score, total_games, regular_season_games, playoff_games, margin
)
)
not_occurred_scores <- not_occurred_scores %>%
mutate(
hover_text = sprintf("Score: %d-%d<br>NEVER OCCURRED<br>Margin: %d",
winner_score, loser_score, margin)
)
# Create 3D scatter plot with Plotly
fig <- plot_ly()
# Add occurred scores
fig <- fig %>%
add_trace(
data = occurred_scores,
x = ~loser_score,
y = ~winner_score,
z = ~total_games,
type = "scatter3d",
mode = "markers",
marker = list(
size = 3,
color = ~total_games,
colorscale = "Viridis",
showscale = TRUE,
colorbar = list(title = "Frequency")
),
text = ~hover_text,
hoverinfo = "text",
name = "Occurred Scores"
)
# Add not occurred scores as small gray points at z=0
fig <- fig %>%
add_trace(
data = not_occurred_scores %>% sample_n(min(1000, nrow(not_occurred_scores))), # Sample for performance
x = ~loser_score,
y = ~winner_score,
z = 0,
type = "scatter3d",
mode = "markers",
marker = list(
size = 1,
color = "lightgray",
opacity = 0.3
),
text = ~hover_text,
hoverinfo = "text",
name = "Never Occurred (sample)",
showlegend = TRUE
)
# Configure layout
fig <- fig %>%
layout(
title = "NBA Scorigami - 3D Score Frequency Visualization",
scene = list(
xaxis = list(title = "Losing Team's Score"),
yaxis = list(title = "Winning Team's Score"),
zaxis = list(title = "Frequency (Number of Games)"),
camera = list(
eye = list(x = 1.5, y = 1.5, z = 1.3)
)
)
)
fig
Most Common Scores
most_common <- occurred_scores %>%
arrange(desc(total_games)) %>%
dplyr::select(winner_score, loser_score, total_games, regular_season_games, playoff_games, margin) %>%
head(20)
knitr::kable(most_common,
col.names = c("Winner", "Loser", "Total", "Regular Season", "Playoff", "Margin"),
caption = "20 Most Common Score Combinations")
20 Most Common Score Combinations
| Winner |
Loser |
Total |
Regular Season |
Playoff |
Margin |
| 108 |
101 |
166 |
156 |
10 |
7 |
| 104 |
101 |
160 |
151 |
9 |
3 |
| 104 |
98 |
158 |
146 |
12 |
6 |
| 105 |
100 |
158 |
151 |
7 |
5 |
| 106 |
104 |
158 |
151 |
7 |
2 |
| 100 |
98 |
155 |
147 |
8 |
2 |
| 108 |
103 |
154 |
144 |
10 |
5 |
| 105 |
99 |
153 |
141 |
12 |
6 |
| 100 |
93 |
152 |
141 |
11 |
7 |
| 106 |
101 |
152 |
141 |
11 |
5 |
| 105 |
103 |
152 |
144 |
8 |
2 |
| 106 |
103 |
152 |
142 |
10 |
3 |
| 102 |
100 |
150 |
145 |
5 |
2 |
| 99 |
94 |
149 |
142 |
7 |
5 |
| 106 |
98 |
148 |
141 |
7 |
8 |
| 101 |
99 |
148 |
139 |
9 |
2 |
| 104 |
102 |
148 |
139 |
9 |
2 |
| 105 |
98 |
147 |
140 |
7 |
7 |
| 106 |
102 |
147 |
139 |
8 |
4 |
| 103 |
96 |
146 |
136 |
10 |
7 |
Rarest Scores (That Have Occurred)
rarest <- occurred_scores %>%
filter(total_games == 1) %>%
arrange(desc(winner_score), desc(margin)) %>%
dplyr::select(winner_score, loser_score, total_games, game_type_label = regular_season_games, margin) %>%
head(20)
knitr::kable(rarest,
col.names = c("Winner", "Loser", "Total", "Was Regular Season?", "Margin"),
caption = "Sample of Scores That Have Occurred Only Once")
Sample of Scores That Have Occurred Only Once
| Winner |
Loser |
Total |
Was Regular Season? |
Margin |
| 186 |
184 |
1 |
1 |
2 |
| 176 |
175 |
1 |
1 |
1 |
| 173 |
139 |
1 |
1 |
34 |
| 173 |
143 |
1 |
1 |
30 |
| 171 |
166 |
1 |
1 |
5 |
| 169 |
147 |
1 |
1 |
22 |
| 168 |
116 |
1 |
1 |
52 |
| 168 |
161 |
1 |
1 |
7 |
| 165 |
151 |
1 |
1 |
14 |
| 163 |
125 |
1 |
1 |
38 |
| 163 |
148 |
1 |
1 |
15 |
| 163 |
155 |
1 |
1 |
8 |
| 162 |
99 |
1 |
1 |
63 |
| 162 |
100 |
1 |
1 |
62 |
| 162 |
109 |
1 |
1 |
53 |
| 162 |
135 |
1 |
1 |
27 |
| 162 |
143 |
1 |
1 |
19 |
| 162 |
158 |
1 |
1 |
4 |
| 161 |
133 |
1 |
1 |
28 |
| 161 |
153 |
1 |
1 |
8 |
Most Likely Scores That Have Never Occurred
Using a simple probability model based on score distributions:
library(MASS)
# Fit distributions (only on occurred scores)
winner_fit <- fitdistr(all_games$winner_score, "normal")
loser_fit <- fitdistr(all_games$loser_score, "normal")
# Calculate approximate likelihood for each score combination
score_tally <- score_tally %>%
mutate(
likelihood = dnorm(winner_score, winner_fit$estimate[1], winner_fit$estimate[2]) *
dnorm(loser_score, loser_fit$estimate[1], loser_fit$estimate[2])
)
# Most likely scores that haven't occurred
most_likely_missing <- score_tally %>%
filter(!has_occurred) %>%
arrange(desc(likelihood)) %>%
dplyr::select(winner_score, loser_score, margin, likelihood) %>%
head(20)
knitr::kable(most_likely_missing,
col.names = c("Winner", "Loser", "Margin", "Likelihood Score"),
caption = "Most Likely Scores That Have Never Occurred",
digits = 6)
Most Likely Scores That Have Never Occurred
| Winner |
Loser |
Margin |
Likelihood Score |
| 126 |
90 |
36 |
0.000346 |
| 124 |
83 |
41 |
0.000248 |
| 128 |
87 |
41 |
0.000237 |
| 122 |
81 |
41 |
0.000236 |
| 131 |
93 |
38 |
0.000230 |
| 126 |
84 |
42 |
0.000228 |
| 125 |
83 |
42 |
0.000228 |
| 127 |
85 |
42 |
0.000225 |
| 130 |
89 |
41 |
0.000216 |
| 122 |
80 |
42 |
0.000212 |
| 125 |
82 |
43 |
0.000207 |
| 131 |
90 |
41 |
0.000202 |
| 119 |
78 |
41 |
0.000202 |
| 129 |
86 |
43 |
0.000198 |
| 113 |
76 |
37 |
0.000194 |
| 131 |
89 |
42 |
0.000191 |
| 120 |
78 |
42 |
0.000191 |
| 118 |
77 |
41 |
0.000187 |
| 129 |
85 |
44 |
0.000183 |
| 121 |
78 |
43 |
0.000179 |
Score Distribution by Game Type
game_type_summary <- all_games %>%
group_by(game_type_label) %>%
summarise(
n_games = n(),
mean_winner = mean(winner_score),
mean_loser = mean(loser_score),
mean_margin = mean(winner_score - loser_score),
.groups = "drop"
)
knitr::kable(game_type_summary, digits = 2,
caption = "Score Statistics by Game Type")
Score Statistics by Game Type
| game_type_label |
n_games |
mean_winner |
mean_loser |
mean_margin |
| Playoff |
4441 |
106.87 |
95.90 |
10.97 |
| Regular Season |
67438 |
109.73 |
98.75 |
10.98 |