snowflake

For this visualization project, I sought to display how the New York Knicks (My favorite team) shoots as a unit relative to league average. For this goal, I have created what is sometimes called a “snowflake chart.”
All data are from the 2021-22 season.
I give all the credit to Owen Philips whose code and tutorials made it possible for me to pursue this project.
Code can be found below.
setwd("C:/Users/shasu/OneDrive/Desktop/R Files")
library(tidyverse)
library(metR)
library(nbastatR)
library(extrafont)
library(teamcolors)
library(cowplot)

# Custom theme
theme_owen <- function () { 
  theme_minimal(base_size=11, base_family="Consolas") %+replace% 
    theme(
      panel.grid.minor = element_blank(),
      plot.background = element_rect(fill = 'floralwhite', color = "floralwhite")
    )
}

# Turn off scientific notation 
options(scipen=999)

# Get NBA team names
Sys.setenv(VROOM_CONNECTION_SIZE=500072)
tms <- nba_teams()
tms <- tms %>% filter(isNonNBATeam == 0) %>% select(nameTeam, idTeam, slugTeam)

# Get primary team colors 
tm.colors <- teamcolors::teamcolors
tm.colors <- tm.colors %>% filter(league == "nba")
tm.colors <- tm.colors %>% select(name, secondary)

# Manually adjust a few team colors
tm.colors <- tm.colors %>% 
  mutate(secondary = case_when(
    name == "Golden State Warriors" ~ "#1D428A",
    name == "Indiana Pacers" ~ "#002D62",
    name == "Los Angeles Lakers" ~ "#552583",
    name == "San Antonio Spurs" ~ "#000000",
    name == "Oklahoma City Thunder" ~ "#EF3B24",
    name == "Charlotte Hornets" ~ "#00788C",
    name == "Utah Jazz" ~ "#00471B",
    name == "New Orleans Pelicans" ~ "#0C2340",
    TRUE ~ secondary
  ))

# Load NBA court dimensions from Github
devtools::source_url("https://github.com/Henryjean/NBA-Court/blob/main/CourtDimensions.R?raw=TRUE")

df <- read.csv("shotData23.csv")

# Merge shot data with team names
df <- left_join(df, tms)

# clean up the shot data a bit to fit our court dimensions 
df <- df %>% mutate(locationX = as.numeric(as.character(locationX)) / 10,
                    locationY = as.numeric(as.character(locationY)) / 10 + hoop_center_y)

# Horizontally flip the data
df$locationX <- df$locationX * -1 

# Clean up the Clippers team name for merging purposes 
df <- df %>% 
  mutate(nameTeam = case_when(
    nameTeam == "LA Clippers" ~ "Los Angeles Clippers", 
    TRUE ~ nameTeam   
  )) %>% 
  mutate(slugTeam = case_when(
    nameTeam == "Los Angeles Clippers" ~ "LAC", 
    TRUE ~ slugTeam
  ))

# Filter out any shots greater than 35 feet (too noisy beyond that)
df <- df %>% filter(distanceShot <= 35)

get_density <- function(x, y, ...) {
  density_out <- MASS::kde2d(x, y, ...)
  int_x <- findInterval(x, density_out$x)
  int_y <- findInterval(y, density_out$y)
  comb_int <- cbind(int_x, int_y)
  return(density_out$z[comb_int])
}

# Pick a team of interest
team1 <- "New York Knicks"

# Decide how granular we want to go w/ our density calculation. I've found that 300 works pretty well, but note that it runs a bit slow
n <- 300

# filter data to our team of interest, assign it to tm1
tm1 <- df %>% 
  select(locationX, locationY, nameTeam) %>% 
  filter(nameTeam == team1)

# filter data for every team other than our team of interest, assign it to tm2
tm2 <- df %>% 
  select(locationX, locationY, nameTeam)%>% 
  filter(nameTeam != team1)

# get x/y coords as vectors
tm1_x <- pull(tm1, locationX)
tm1_y <- pull(tm1, locationY)

# get x/y coords as vectors
tm2_x <- pull(tm2, locationX)
tm2_y <- pull(tm2, locationY)

# get x and y range to compute comparisons across
x_rng = range(c(-25, 25))
y_rng = range(c(0, 52))

# Explicitly calculate bandwidth for future use
bandwidth_x <- MASS::bandwidth.nrd(c(tm1_x, tm2_x))
bandwidth_y <- MASS::bandwidth.nrd(c(tm1_y, tm2_y))

bandwidth_calc <- c(bandwidth_x, bandwidth_y)

# Calculate the density estimate over the specified x and y range
d2_tm1 = MASS::kde2d(tm1_x, tm1_y, h = bandwidth_calc, n=n, lims=c(x_rng, y_rng))
d2_tm2 = MASS::kde2d(tm2_x, tm2_y, h = bandwidth_calc, n=n, lims=c(x_rng, y_rng))

# Create a new dataframe that contains the difference in shot density between our two dataframes 
df_diff <- d2_tm1

# matrix subtraction density from tm1 from league average
df_diff$z <- d2_tm1$z - d2_tm2$z

# add matrix col names
colnames(df_diff$z) <- df_diff$y

# Convert list to dataframe with relevant variables and columns
df_diff <- df_diff$z %>% 
  as.data.frame() %>% 
  mutate(x_coord = df_diff$x) %>% 
  pivot_longer(-x_coord, names_to = "y_coord", values_to = "z") %>% 
  mutate(y_coord = as.double(y_coord),
         name = team1) %>% 
  ungroup()

##
df_diff <- left_join(df_diff, tm.colors, by = "name")

##
p <- ggplot() +
  geom_contour_fill(data = df_diff %>% filter(z >= mean(z)), aes(x = x_coord, y = y_coord, z = sqrt(z)))  +
  geom_contour_tanaka(data = df_diff, aes(x = x_coord, y = y_coord, z = sqrt(z)), bins = 5)  +
  coord_fixed(clip = 'off') +
  theme_owen() +
  scale_fill_gradient2(low = 'floralwhite', mid = 'floralwhite', high = df_diff$secondary[1]) +
  scale_y_continuous(limits = c(-2.5, 41)) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme(legend.position = 'none',
        line = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(), 
        plot.margin = margin(.25, .25, .5, .25, "lines"), 
        plot.title = element_text(size = 24, hjust= .5, face = 'bold', margin = margin(t = -25), color = '#006bb6'),
        plot.subtitle = element_text(size = 10, hjust= .5, face = 'bold', margin = margin(t = 15)),
        plot.caption = element_text(hjust = .5, size = 12)) +
  geom_path(data = court_points,
            aes(x = x, y = y, group = desc, linetype = dash),
            color = "black", size = .25) +
  labs(title = df_diff$name[1], 
       subtitle = "Where the Knicks tend to shoot from relative to league average", 
       caption = "Darker areas indicate a higher frequency of shot attempts from that location relative to league average") +
  geom_curve(aes(x = 0, y = 32.5, xend = 2.5, yend = 14), curvature = -0.2, arrow = arrow(length = unit(0.03, "npc"))) + 
  annotate(geom = 'label', x = 0, y = 33.5, hjust = .5, label = "The Knicks like to shoot in the area between midrange and the rim", family = "Consolas", size = 3)

p <- cowplot::ggdraw(p) + 
  theme(plot.background = element_rect(fill="floralwhite", color = NA))
p
snowflake_chart

Shota Hasui

2023-01-25