For this project, I wanted to showcase the diversity that is found in the modern NBA. Through three players, all with different archetypes, I show that here. The players are Kevin Durant of the Brooklyn Nets, Luke Kennard of the Los Angeles Clippers and Nic Claxton of the Brooklyn Nets. You will see below that each player has very distinct areas where they are effective. Or, in Durant’s case, just a few areas where he is less effective. ` Code and the shot charts are below.

library(tidyverse)
library(httr)
library(hexbin)
library(jsonlite)
library(scales)


percent_formatter = function(x) {
  scales::percent(x, accuracy = 1)
}


players_url = "http://stats.nba.com/stats/commonallplayers?LeagueID=00&Season=2019-20&IsOnlyCurrentSeason=0"

request_headers = c(
  `Connection` = 'keep-alive',
  `Accept` = 'application/json, text/plain, */*',
  `x-nba-stats-token` = 'true',
  `X-NewRelic-ID` = 'VQECWF5UChAHUlNTBwgBVw==',
  `User-Agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36',
  `x-nba-stats-origin` = 'stats',
  `Sec-Fetch-Site` = 'same-origin',
  `Sec-Fetch-Mode` = 'cors',
  `Referer` = 'https://stats.nba.com/players/leaguedashplayerbiostats/',
  `Accept-Encoding` = 'gzip, deflate, br',
  `Accept-Language` = 'en-US,en;q=0.9'
)

request = GET(players_url, add_headers(request_headers))

players_data = fromJSON(content(request, as = "text"))
players = tbl_df(data.frame(players_data$resultSets$rowSet[[1]], stringsAsFactors = FALSE))
names(players) = tolower(players_data$resultSets$headers[[1]])

players = mutate(players,
                 person_id = as.numeric(person_id),
                 rosterstatus = as.logical(as.numeric(rosterstatus)),
                 from_year = as.numeric(from_year),
                 to_year = as.numeric(to_year),
                 team_id = as.numeric(team_id)
)



if (Sys.Date() <= as.Date("2017-10-20")) {
  players = mutate(players, to_year = pmin(to_year, 2016))
}

players$name = sapply(players$display_last_comma_first, function(s) {
  paste(rev(strsplit(s, ", ")[[1]]), collapse = " ")
})

first_year_of_data = 1996
last_year_of_data = max(players$to_year)
season_strings = paste(first_year_of_data:last_year_of_data,
                       substr(first_year_of_data:last_year_of_data + 1, 3, 4),
                       sep = "-")

names(season_strings) = first_year_of_data:last_year_of_data

available_players = filter(players, to_year >= first_year_of_data)

names_table = table(available_players$name)
dupe_names = names(names_table[which(names_table > 1)])

available_players$name[available_players$name %in% dupe_names] = paste(
  available_players$name[available_players$name %in% dupe_names],
  available_players$person_id[available_players$name %in% dupe_names]
)

available_players$lower_name = tolower(available_players$name)
available_players = arrange(available_players, lower_name)

find_player_by_name = function(n) {
  filter(available_players, lower_name == tolower(n))
}

find_player_id_by_name = function(n) {
  find_player_by_name(n)$person_id
}







circle_points = function(center = c(0, 0), radius = 1, npoints = 360) {
  angles = seq(0, 2 * pi, length.out = npoints)
  return(data_frame(x = center[1] + radius * cos(angles),
                    y = center[2] + radius * sin(angles)))
}

width = 50
height = 94 / 2
key_height = 19
inner_key_width = 12
outer_key_width = 16
backboard_width = 6
backboard_offset = 4
neck_length = 0.5
hoop_radius = 0.75
hoop_center_y = backboard_offset + neck_length + hoop_radius
three_point_radius = 23.75
three_point_side_radius = 22
three_point_side_height = 14

court_themes = list(
  light = list(
    court = 'floralwhite',
    lines = '#999999',
    text = '#222222',
    made = '#00bfc4',
    missed = '#f8766d',
    hex_border_size = 1,
    hex_border_color = "#000000"
  ),
  dark = list(
    court = '#000004',
    lines = '#999999',
    text = '#f0f0f0',
    made = '#00bfc4',
    missed = '#f8766d',
    hex_border_size = 0,
    hex_border_color = "#000000"
  )
)


plot_court = function(court_theme = court_themes$light, use_short_three = FALSE) {
  if (use_short_three) {
    three_point_radius = 22
    three_point_side_height = 0
  }
  
  court_points = data_frame(
    x = c(width / 2, width / 2, -width / 2, -width / 2, width / 2),
    y = c(height, 0, 0, height, height),
    desc = "perimeter"
  )
  
  court_points = bind_rows(court_points , data_frame(
    x = c(outer_key_width / 2, outer_key_width / 2, -outer_key_width / 2, -outer_key_width / 2),
    y = c(0, key_height, key_height, 0),
    desc = "outer_key"
  ))
  
  court_points = bind_rows(court_points , data_frame(
    x = c(-backboard_width / 2, backboard_width / 2),
    y = c(backboard_offset, backboard_offset),
    desc = "backboard"
  ))
  
  court_points = bind_rows(court_points , data_frame(
    x = c(0, 0), y = c(backboard_offset, backboard_offset + neck_length), desc = "neck"
  ))
  
  foul_circle = circle_points(center = c(0, key_height), radius = inner_key_width / 2)
  
  foul_circle_top = filter(foul_circle, y > key_height) %>%
    mutate(desc = "foul_circle_top")
  
  foul_circle_bottom = filter(foul_circle, y < key_height) %>%
    mutate(
      angle = atan((y - key_height) / x) * 180 / pi,
      angle_group = floor((angle - 5.625) / 11.25),
      desc = paste0("foul_circle_bottom_", angle_group)
    ) %>%
    filter(angle_group %% 2 == 0) %>%
    select(x, y, desc)
  
  hoop = circle_points(center = c(0, hoop_center_y), radius = hoop_radius) %>%
    mutate(desc = "hoop")
  
  restricted = circle_points(center = c(0, hoop_center_y), radius = 4) %>%
    filter(y >= hoop_center_y) %>%
    mutate(desc = "restricted")
  
  three_point_circle = circle_points(center = c(0, hoop_center_y), radius = three_point_radius) %>%
    filter(y >= three_point_side_height, y >= hoop_center_y)
  
  three_point_line = data_frame(
    x = c(three_point_side_radius, three_point_side_radius, three_point_circle$x, -three_point_side_radius, -three_point_side_radius),
    y = c(0, three_point_side_height, three_point_circle$y, three_point_side_height, 0),
    desc = "three_point_line"
  )
  
  court_points = bind_rows(
    court_points,
    foul_circle_top,
    foul_circle_bottom,
    hoop,
    restricted,
    three_point_line
  )
  
  
  court_points <- court_points
  
  ggplot() +
    geom_path(
      data = court_points,
      aes(x = x, y = y, group = desc),
      color = court_theme$lines
    ) +
    coord_fixed(ylim = c(0, 45), xlim = c(-25, 25)) +
    theme_minimal(base_size = 22) +
    theme(
      text = element_text(color = court_theme$text),
      plot.background = element_rect(fill = 'floralwhite', color = 'floralwhite'),
      panel.background = element_rect(fill = court_theme$court, color = court_theme$court),
      panel.grid = element_blank(),
      panel.border = element_blank(),
      axis.text = element_blank(),
      axis.title = element_blank(),
      axis.ticks = element_blank(),
      legend.background = element_rect(fill = court_theme$court, color = court_theme$court),
      legend.margin = margin(-1, 0, 0, 0, unit = "lines"),
      legend.position = "bottom",
      legend.key = element_blank(),
      legend.text = element_text(size = rel(1.0))
    )
}

plot_court(court_themes$light)

fetch_shots_by_player_id_and_season = function(player_id, season, season_type) {
  
  request = GET(
    "http://stats.nba.com/stats/shotchartdetail",
    query = list(
      PlayerID = player_id,
      Season = season,
      SeasonType = season_type,
      PlayerPosition = "",
      ContextMeasure = "FGA",
      DateFrom = "",
      DateTo = "",
      GameID = "",
      GameSegment = "",
      LastNGames = 0,
      LeagueID = "00",
      Location = "",
      Month = 0,
      OpponentTeamID = 0,
      Outcome = "",
      Period = 0,
      Position = "",
      RookieYear = "",
      SeasonSegment = "",
      TeamID = 0,
      VsConference = "",
      VsDivision = ""
    ),
    add_headers(request_headers)
  )
  
  stop_for_status(request)
  
  data = content(request)
  
  raw_shots_data = data$resultSets[[1]]$rowSet
  col_names = tolower(as.character(data$resultSets[[1]]$headers))
  
  if (length(raw_shots_data) == 0) {
    shots = data.frame(
      matrix(nrow = 0, ncol = length(col_names))
    )
  } else {
    shots = data.frame(
      matrix(
        unlist(raw_shots_data),
        ncol = length(col_names),
        byrow = TRUE
      )
    )
  }
  
  shots = tbl_df(shots)
  names(shots) = col_names
  
  shots = mutate(shots,
                 loc_x = as.numeric(as.character(loc_x)) / 10,
                 loc_y = as.numeric(as.character(loc_y)) / 10 + hoop_center_y,
                 shot_distance = as.numeric(as.character(shot_distance)),
                 shot_made_numeric = as.numeric(as.character(shot_made_flag)),
                 shot_made_flag = factor(shot_made_flag, levels = c("1", "0"), labels = c("made", "missed")),
                 shot_attempted_flag = as.numeric(as.character(shot_attempted_flag)),
                 shot_value = ifelse(tolower(shot_type) == "3pt field goal", 3, 2),
                 game_date = as.Date(game_date, format = "%Y%m%d")
  )
  
  raw_league_avg_data = data$resultSets[[2]]$rowSet
  league_avg_names = tolower(as.character(data$resultSets[[2]]$headers))
  league_averages = tbl_df(data.frame(
    matrix(unlist(raw_league_avg_data), ncol = length(league_avg_names), byrow = TRUE)
  ))
  names(league_averages) = league_avg_names
  league_averages = mutate(league_averages,
                           fga = as.numeric(as.character(fga)),
                           fgm = as.numeric(as.character(fgm)),
                           fg_pct = as.numeric(as.character(fg_pct)),
                           shot_value = ifelse(shot_zone_basic %in% c("Above the Break 3", "Backcourt", "Left Corner 3", "Right Corner 3"), 3, 2)
  )
  
  return(list(player = shots, league_averages = league_averages))
}






hex_bounds <- function(x, binwidth) {
  c(
    plyr::round_any(min(x), binwidth, floor) - 1e-6,
    plyr::round_any(max(x), binwidth, ceiling) + 1e-6
  )
}

calculate_hex_coords = function(shots, binwidths) {
  xbnds = hex_bounds(shots$loc_x, binwidths[1])
  xbins = diff(xbnds) / binwidths[1]
  ybnds = hex_bounds(shots$loc_y, binwidths[2])
  ybins = diff(ybnds) / binwidths[2]
  
  hb = hexbin(
    x = shots$loc_x,
    y = shots$loc_y,
    xbins = xbins,
    xbnds = xbnds,
    ybnds = ybnds,
    shape = ybins / xbins,
    IDs = TRUE
  )
  
  shots = mutate(shots, hexbin_id = hb@cID)
  
  hexbin_stats = shots %>%
    group_by(hexbin_id) %>%
    summarize(
      hex_attempts = n(),
      hex_pct = mean(shot_made_numeric),
      hex_points_scored = sum(shot_made_numeric * shot_value),
      hex_points_per_shot = mean(shot_made_numeric * shot_value)
    ) 
  
  hexbin_ids_to_zones = shots %>%
    group_by(hexbin_id, shot_zone_range, shot_zone_area) %>%
    summarize(attempts = n()) %>%
    ungroup() %>%
    arrange(hexbin_id, desc(attempts)) %>%
    group_by(hexbin_id) %>%
    filter(row_number() == 1) %>%
    select(hexbin_id, shot_zone_range, shot_zone_area)
  
  hexbin_stats = inner_join(hexbin_stats, hexbin_ids_to_zones, by = "hexbin_id")
  
  # from hexbin package, see: https://github.com/edzer/hexbin
  sx = hb@xbins / diff(hb@xbnds)
  sy = (hb@xbins * hb@shape) / diff(hb@ybnds)
  dx = 1 / (2 * sx)
  dy = 1 / (2 * sqrt(3) * sy)
  origin_coords = hexcoords(dx, dy)
  
  hex_centers = hcell2xy(hb)
  
  hexbin_coords = bind_rows(lapply(1:hb@ncells, function(i) {
    data.frame(
      x = origin_coords$x + hex_centers$x[i],
      y = origin_coords$y + hex_centers$y[i],
      center_x = hex_centers$x[i],
      center_y = hex_centers$y[i],
      hexbin_id = hb@cell[i]
    )
  }))
  
  inner_join(hexbin_coords, hexbin_stats, by = "hexbin_id")
}



calculate_hexbins_from_shots = function(shots, league_averages, binwidths, min_radius_factor, fg_diff_limits, fg_pct_limits, pps_limits) {
  if (nrow(shots) == 0) {
    return(list())
  }
  
  
  
  
  grouped_shots = group_by(shots, shot_zone_range, shot_zone_area)
  
  zone_stats = grouped_shots %>%
    summarize(
      zone_attempts = n(),
      zone_pct = mean(shot_made_numeric),
      zone_points_scored = sum(shot_made_numeric * shot_value),
      zone_points_per_shot = mean(shot_made_numeric * shot_value)
    )
  
  league_zone_stats = league_averages %>%
    group_by(shot_zone_range, shot_zone_area) %>%
    summarize(league_pct = sum(fgm) / sum(fga))
  
  hex_data = calculate_hex_coords(shots, binwidths = binwidths)
  
  join_keys = c("shot_zone_area", "shot_zone_range")
  
  hex_data = hex_data %>%
    inner_join(zone_stats, by = join_keys) %>%
    inner_join(league_zone_stats, by = join_keys)
  
  max_hex_attempts = max(hex_data$hex_attempts)
  
  
  hex_data = mutate(hex_data,
                    radius_factor = min_radius_factor + (1 - min_radius_factor) * log(hex_attempts + 1) / log(max_hex_attempts + 1),
                    adj_x = center_x + radius_factor * (x - center_x),
                    adj_y = center_y + radius_factor * (y - center_y),
                    bounded_fg_diff = pmin(pmax(zone_pct - league_pct, fg_diff_limits[1]), fg_diff_limits[2]),
                    bounded_fg_pct = pmin(pmax(zone_pct, fg_pct_limits[1]), fg_pct_limits[2]),
                    bounded_points_per_shot = pmin(pmax(zone_points_per_shot, pps_limits[1]), pps_limits[2]))
  
  list(hex_data = hex_data, fg_diff_limits = fg_diff_limits, fg_pct_limits = fg_pct_limits, pps_limits = pps_limits)
  
}



get_data <- function(id, seasons, season_type) {
  
  df <- fetch_shots_by_player_id_and_season(id, seasons, season_type)
  
  shots <- as.data.frame(df[1])
  league_averages <- as.data.frame(df[2])
  
  
  names(shots) <- sub(".*\\.", "", names(shots))
  names(league_averages) <- sub(".*\\.", "", names(league_averages))
  
  
  hex_data <- calculate_hexbins_from_shots(shots, league_averages,binwidths = c(1.5, 1.5), min_radius_factor = .25, fg_diff_limits = c(-0.15, 0.15), fg_pct_limits = c(0.2, 0.7), pps_limits = c(0.5, 1.5))  
  
  df <- hex_data
  
  df <- as.data.frame(df[1])
  
  df$season <- seasons
  df$person_id <- id
  
  
  names(df) <- sub(".*\\.", "", names(df))
  
  return(df)
  
}


df <- get_data(201142, "2022-23", "Regular Season")


library(prismatic)
library(extrafont)
library(cowplot)

p <- plot_court(court_themes$light) +
  geom_polygon(
    data = df,
    aes(
      x = adj_x,
      y = adj_y,
      group = hexbin_id, 
      fill = bounded_fg_diff, 
      color = after_scale(clr_darken(fill, .333))),
    size = .25) + 
  scale_x_continuous(limits = c(-27.5, 27.5)) + 
  scale_y_continuous(limits = c(0, 45)) +
  scale_fill_distiller(direction = -1, 
                       palette = "RdBu", 
                       limits = c(-.15, .15), 
                       breaks = seq(-.15, .15, .03),
                       labels = c("-15%", "-12%", "-9%", "-6%", "-3%", "0%", "+3%", "+6%", "+9%", "+12%", "+15%"),
                       "FG Percentage Points vs. League Average") +
  guides(fill=guide_legend(
    label.position = 'bottom', 
    title.position = 'top', 
    keywidth=.45,
    keyheight=.15, 
    default.unit="inch", 
    title.hjust = .5,
    title.vjust = 0,
    label.vjust = 3,
    nrow = 1))  +
  theme(text=element_text(size=14,  family="Gill Sans MT"), 
        legend.spacing.x = unit(0, 'cm'), 
        legend.title=element_text(size=12), 
        legend.text = element_text(size = rel(0.6)), 
        legend.margin=margin(-10,0,-1,0),
        legend.position = 'bottom',
        legend.box.margin=margin(-30,0,15,0), 
        plot.title = element_text(hjust = 0.5, vjust = -1, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 9, vjust = -.5), 
        plot.caption = element_text(face = "italic", size = 8), 
        plot.margin = margin(0, -5, 0, -5, "cm")) +
  labs(title = "Kevin Durant",
       subtitle = "2022-2023 Regular Season")

ggdraw(p) + 
  theme(plot.background = element_rect(fill="floralwhite", color = NA))

###

df <- get_data(1628379, "2022-23", "Regular Season")


library(prismatic)
library(extrafont)
library(cowplot)

p <- plot_court(court_themes$light) +
  geom_polygon(
    data = df,
    aes(
      x = adj_x,
      y = adj_y,
      group = hexbin_id, 
      fill = bounded_fg_diff, 
      color = after_scale(clr_darken(fill, .333))),
    size = .25) + 
  scale_x_continuous(limits = c(-27.5, 27.5)) + 
  scale_y_continuous(limits = c(0, 45)) +
  scale_fill_distiller(direction = -1, 
                       palette = "RdBu", 
                       limits = c(-.15, .15), 
                       breaks = seq(-.15, .15, .03),
                       labels = c("-15%", "-12%", "-9%", "-6%", "-3%", "0%", "+3%", "+6%", "+9%", "+12%", "+15%"),
                       "FG Percentage Points vs. League Average") +
  guides(fill=guide_legend(
    label.position = 'bottom', 
    title.position = 'top', 
    keywidth=.45,
    keyheight=.15, 
    default.unit="inch", 
    title.hjust = .5,
    title.vjust = 0,
    label.vjust = 3,
    nrow = 1))  +
  theme(text=element_text(size=14,  family="Gill Sans MT"), 
        legend.spacing.x = unit(0, 'cm'), 
        legend.title=element_text(size=12), 
        legend.text = element_text(size = rel(0.6)), 
        legend.margin=margin(-10,0,-1,0),
        legend.position = 'bottom',
        legend.box.margin=margin(-30,0,15,0), 
        plot.title = element_text(hjust = 0.5, vjust = -1, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 9, vjust = -.5), 
        plot.caption = element_text(face = "italic", size = 8), 
        plot.margin = margin(0, -5, 0, -5, "cm")) +
  labs(title = "Luke Kennard",
       subtitle = "2022-2023 Regular Season")

ggdraw(p) + 
  theme(plot.background = element_rect(fill="floralwhite", color = NA))

###

df <- get_data(1629651, "2022-23", "Regular Season")


library(prismatic)
library(extrafont)
library(cowplot)

p <- plot_court(court_themes$light) +
  geom_polygon(
    data = df,
    aes(
      x = adj_x,
      y = adj_y,
      group = hexbin_id, 
      fill = bounded_fg_diff, 
      color = after_scale(clr_darken(fill, .333))),
    size = .25) + 
  scale_x_continuous(limits = c(-27.5, 27.5)) + 
  scale_y_continuous(limits = c(0, 45)) +
  scale_fill_distiller(direction = -1, 
                       palette = "RdBu", 
                       limits = c(-.15, .15), 
                       breaks = seq(-.15, .15, .03),
                       labels = c("-15%", "-12%", "-9%", "-6%", "-3%", "0%", "+3%", "+6%", "+9%", "+12%", "+15%"),
                       "FG Percentage Points vs. League Average") +
  guides(fill=guide_legend(
    label.position = 'bottom', 
    title.position = 'top', 
    keywidth=.45,
    keyheight=.15, 
    default.unit="inch", 
    title.hjust = .5,
    title.vjust = 0,
    label.vjust = 3,
    nrow = 1))  +
  theme(text=element_text(size=14,  family="Gill Sans MT"), 
        legend.spacing.x = unit(0, 'cm'), 
        legend.title=element_text(size=12), 
        legend.text = element_text(size = rel(0.6)), 
        legend.margin=margin(-10,0,-1,0),
        legend.position = 'bottom',
        legend.box.margin=margin(-30,0,15,0), 
        plot.title = element_text(hjust = 0.5, vjust = -1, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 9, vjust = -.5), 
        plot.caption = element_text(face = "italic", size = 8), 
        plot.margin = margin(0, -5, 0, -5, "cm")) +
  labs(title = "Nic Claxton",
       subtitle = "2022-2023 Regular Season")

ggdraw(p) + 
  theme(plot.background = element_rect(fill="floralwhite", color = NA))