In recent years, teams have wondered whether they should crash the paint for offensive rebounds. Doing so can lead to quality second chances for teams, but if too many players crash the paint, the opposing defense can take advantage of easier transition opportunities. Using purely descriptive values, this markdown tries to quantify this trade off and see which shot locations lead to frequent offensive rebounds while minimizing the damage from transition opportunities.
In an ideal world, we would have tracking information or know how many players are crashing for offensive rebounds on a given play. Unfortunately, the only information I have is shot locations and the list of ten players on the floor, so we will mainly be reviewing offensive rebounding statistics across shot locations.
knitr::opts_chunk$set(echo = TRUE, results = "show", message=F, warning=F, fig.width=8, fig.height=4)
library(ggplot2)
library(tidyverse)
library(nbastatR)
setwd("C:/Users/justi/OneDrive - PennO365/Moneyball")
pbp <- readRDS("data_2022.rds")
pbp$lineup_away <- gsub(", ", ",", pbp$lineup_away)
pbp$lineup_home <- gsub(", ", ",", pbp$lineup_home)
pbp <- pbp %>%
separate(lineup_home, c("h1", "h2", "h3", "h4", "h5"), sep = ',')
pbp <- pbp %>%
separate(lineup_away, c("a1", "a2", "a3", "a4", "a5"), sep = ',')
pbp <- pbp %>%
mutate(home_binded = paste(h1, h2, h3, h4, h5, sep = "-"),
away_binded = paste(a1, a2, a3, a4, a5, sep = "-"))
pbp <- pbp %>%
mutate(game_id = as.character(game_id),
turnover = ifelse(str_detect(description, "Turnover"), 1, 0),
FTA = ifelse(str_detect(description, "Free Throw"), 1, 0),
Three = ifelse(str_detect(description, "3pt"), 1, 0),
Dunk = ifelse(str_detect(description, "Dunk"), 1, 0),
Layup = ifelse(str_detect(description, "Layup"), 1, 0),
Jumpshot = ifelse(str_detect(description, "Jump Shot"), 1, 0)
) %>%
group_by(game_id, off_slug_team) %>%
mutate(possession_counter = cumsum(possession)) %>%
ungroup() %>%
mutate(turnover_non_transition = ifelse((str_detect(description, "Out of Bounds") & str_detect(description, "Turnover")) |
str_detect(description, "Offensive Foul Turnover") |
str_detect(description, "Traveling") |
str_detect(description, "Double Dribble") |
str_detect(description, "Shot Clock Turnover") |
str_detect(description, "Illegal Assist Turnover") |
str_detect(description, "Illegal Screen Turnover") |
str_detect(description, "Offensive Charge") |
str_detect(description, "8 Second Violation") |
str_detect(description, "5 Second Violation"), 1, 0 #|
#str_detect(description, "Foul: Offensive"), 1, 0)
))
pbp <- pbp %>%
select(game_id, period, team_home, team_away,
slug_team, off_slug_team, player1, desc_value,
possession, possession_counter, description, shot_pts,
hs, vs, turnover, turnover_non_transition,
secs_passed_game, secs_played, locX, locY)
This PBP dataset has a “description” column that indicates what occurred on that play. Within the first mutate function, I created indicators for different play/shot types based on the description column. Additionally, in a previous mini-project, I compared live vs. dead ball turnovers, so here, I created another indicator for dead-ball turnovers. Here are the categories I included:
These are all considered “dead ball” turnovers, because they result in a stoppage of play and the opposing defense doesn’t have the chance to push the ball in transition.
pbp <- pbp %>%
mutate(off_reb = ifelse(str_detect(description, "Rebound") & lag(off_slug_team) == slug_team, 1, 0),
def_reb = ifelse(str_detect(description, "Rebound") & lag(off_slug_team) != slug_team, 1, 0))
#Removing offensive rebounds off of first missed free throw
pbp <- pbp %>%
mutate(off_reb = ifelse(str_detect(description, "Rebound") & lag(off_slug_team) == slug_team & str_detect(lag(description), "Free Throw 1 of 2 Missed"), 0, off_reb))
#handling other edge case for offensive rebounds
pbp <- pbp %>%
mutate(off_reb = ifelse(str_detect(description, "Team Rebound") & lag(off_slug_team) == slug_team, 0, off_reb))
#new possession counter
pbp <- pbp %>%
filter(!is.na(off_slug_team)) %>%
group_by(game_id, period) %>%
mutate(possession2 = ifelse(row_number()==1|off_slug_team!=lag(off_slug_team), 1, 0)) %>%
ungroup() %>%
group_by(game_id, off_slug_team) %>%
mutate(possession_counter2 = cumsum(possession2)) %>%
ungroup()
#points scored on each possession
pbp <- pbp %>%
group_by(off_slug_team, game_id, possession_counter2) %>%
mutate(points_on_poss = sum(shot_pts)) %>%
ungroup()
#Further checks
pbp <- pbp %>%
group_by(game_id) %>%
mutate(leads_to_oreb = ifelse(lead(off_reb)==1, 1, 0),
same_coords_check = ifelse(off_reb==1 & locX == lag(locX) & locY == lag(locY), 1, 0)) %>%
ungroup()
#defining fgas
pbp <- pbp %>%
mutate(
fga = ifelse(
desc_value > 1 & str_detect(description, "Shot") | str_detect(description, "shot"), 1, 0
)
)
#consider creating one more convenient possession tracker in the future
#now, we want to know the time that ellapses on the first shot or shooting foul at the start of the next possession
pbp <- pbp %>%
group_by(game_id, period) %>%
mutate(shooting_foul_or_fga = ifelse(fga==1 | str_detect(description, "Foul: Shooting"), 1, 0)) %>%
ungroup()
#sum(pbp$shooting_foul_or_fga)
#Obtaining times before shot attempt/foul
pbp <- pbp %>%
group_by(game_id, period) %>%
mutate(time_before_shot_or_foul = ifelse(possession2 == 1, lag(secs_played) + lag(secs_played, n = 2), 0),#this gets the time ellapsed
prev_location_x = lag(locX),
prev_location_y = lag(locY)) %>% #Gets locations of previous shots
ungroup()
#distance formula
calc_distance <- function(x, y, x_baseline = 0, y_baseline = 0) {
sqrt((x-x_baseline)^2 + (y - y_baseline)^2)
}
pbp <- pbp %>%
mutate(locX_ft = locX/10,
locY_ft = locY/10) %>%
mutate(dist = calc_distance(locX_ft, locY_ft)) %>%
group_by(game_id, period) %>%
mutate(lagged_dist = lag(dist)) %>%
ungroup()
pbp <- pbp %>%
mutate(dist2 = calc_distance(locX, locY)/10)
pbp <- pbp %>%
# filter(dist <= 45) %>%
mutate(dist_bins = cut(dist,15))
#field goal attempts table
fgas <- pbp %>%
filter(fga == 1)
View(fgas %>%
mutate(y_bins = cut(locY, (max(locY)-min(locY))/10)) %>%
group_by(y_bins) %>%
summarize(count = n()) %>%
ungroup())
The chunks above create a slightly different possession counter than what’s included in the dataset; I wasn’t fully sure how possessions were indicated here, and it can be confusing in basketball. So, anytime that the offensive team didn’t match the previous one, I marked that as being the start of a possession. Then, I grouped by each game, team, and created a counter for possessions within a game by offensive team. I also was able to group by this new cumulative possession counter to count points on each possession which will be valuable later on.
Lastly, for each field goal attempt, I indicated whether that shot lead to an offensive rebound. Unfortunately, we don’t have the location of the offensive rebound, but only the location of the previous shot attempt.
bin_vector = c(50, 50)
# View(fgas %>%
# filter(shot_pts==0) %>%
# mutate(x_bins = cut(locX, (max(locX)-min(locX))/bin_vector[1]),
# y_bins = cut(locY, (max(locY)-min(locY))/bin_vector[2]))%>%
# group_by(x_bins, y_bins) %>%
# mutate(count = n())%>%
# ungroup())
pbp %>%
filter(leads_to_oreb==1 & locY >= -50) %>%
ggplot() +
geom_bin2d(aes(x = locX, y = locY)) +
labs(title = "Offensive Rebounding Counts by Zone") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data\nCoordinates are Location of Shot")
plotly::ggplotly(fgas %>%
filter(shot_pts==0 & dist <= 40 & locY > -30 & fga == 1) %>% #filtering for misses
# mutate(x_bins = cut(locX, (max(locX)-min(locX))/bin_vector[1]),
# y_bins = cut(locY, (max(locY)-min(locY))/bin_vector[2])) %>%
# group_by(x_bins, y_bins) %>%
# mutate(count = n()) %>%
# ungroup() %>%
ggplot(aes(x = locX, y = locY, z = leads_to_oreb)) +
stat_summary_2d(fun = mean, binwidth = bin_vector) +
#stat_bin_2d(binwidth = bin_vector) +
labs(title = "Offensive Rebounding Rate by Zone") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data"))
plotly::ggplotly(fgas %>%
filter(shot_pts==0 & dist <= 40 & fga == 1 & locY > -30) %>% #filtering for misses
# mutate(x_bins = cut(locX, (max(locX)-min(locX))/bin_vector[1]),
# y_bins = cut(locY, (max(locY)-min(locY))/bin_vector[2])) %>%
# group_by(x_bins, y_bins) %>%
# mutate(count = n()) %>%
# ungroup() %>%
ggplot(aes(x = locX, y = locY, z = leads_to_oreb)) +
# stat_summary_2d(fun = mean, binwidth = bin_vector) +
stat_bin_2d(binwidth = bin_vector) +
labs(title = "Counts of Missed Field Goal Attempts by Zone") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data"))
plotly::ggplotly(pbp %>%
filter(leads_to_oreb==1 & dist <= 30 & locY > -30 & fga ==1) %>%
ggplot(aes(x = locX, y = locY, z = points_on_poss)) +
stat_summary_2d(fun = mean, binwidth = bin_vector) +
labs(title = "Average PPP on Possessions\n after Offensive Rebounds Occur") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data"))
plotly::ggplotly(fgas %>%
filter(shot_pts==0) %>%
filter(dist <= 36) %>%
mutate(dist_bins = cut(dist,12)) %>%
group_by(dist_bins) %>%
summarize(oreb_rate = mean(leads_to_oreb),
count = n()) %>%
ungroup() %>%
ggplot(aes(x = dist_bins, y = oreb_rate)) +
geom_point(aes(size = count)) +
geom_line(group = 1) +
theme(axis.text.x = element_text(angle = 60)) +
labs(x = "Shot Distance Zone", y = "Offensive Rebounding Rate", caption = "Size = Number of Shot Attempts",
title = "Offensive Rebounding Rate by Previous Shot Distance"))
plotly::ggplotly(fgas %>%
filter(leads_to_oreb==1) %>%
filter(dist <= 36) %>%
mutate(dist_bins = cut(dist,12)) %>%
group_by(dist_bins) %>%
summarize(oreb_rate = mean(leads_to_oreb),
ppp = mean(points_on_poss),
count = n()) %>%
ungroup() %>%
ggplot(aes(x = dist_bins, y = ppp)) +
geom_point(aes(size = count)) +
geom_line(group = 1) +
theme(axis.text.x = element_text(angle = 60)) +
labs(x = "Shot Distance Zone", y = "PPP on Possessions with Offensive Rebounds", caption = "Size = Number of Shot Attempts", title = "PPP Following Offensive Rebound by Previous Shot Distance"))
#obtaining first plays in a possession:
first_plays <- pbp %>%
filter(possession2 == 1 & lag(def_reb) == 1 & lag(fga, 2)==1) %>%
filter(!(str_detect(description, "Substitution") | str_detect(description, "Stoppage") | str_detect(description, "Last possession of quarter"))) %>%
mutate(transition = ifelse(time_before_shot_or_foul <= 6, 1, 0))
#For now, including personal fouls; four things can happen after a defensive rebound: shot attempt, shooting foul, turnover, personal foul
#To get transition plays off of defensive rebounds, we will filter for when the time ellapsed is fewer than 8 seconds
transition_plays <- first_plays %>%
filter(time_before_shot_or_foul <= 6)
first_plays %>%
filter(fga==1) %>%
ggplot(aes(x = time_before_shot_or_foul)) +
geom_histogram() +
labs(x = "Time Taken to Shoot After \nObtaining Defensive Rebound")
plotly::ggplotly(first_plays %>%
filter(lagged_dist <= 30 & prev_location_y > -30) %>%
ggplot(aes(x = prev_location_x, y = prev_location_y, z = transition)) +
stat_summary_2d(fun = mean, binwidth = c(50, 50)) +
labs(title = "Average Transition Rate Following Defensive Rebound \nLocations are from Shot that Leads to Defensive Rebound") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data"))
plotly::ggplotly(first_plays %>%
filter(transition==1 & lagged_dist <= 30 & prev_location_y > -30)%>%
ggplot(aes(x = prev_location_x, y = prev_location_y, z = points_on_poss)) +
stat_summary_2d(fun = mean, binwidth = c(50, 50)) +
labs(title = "Average PPP on Defensive Rebound \nLocations are from Shot that Leads to Defensive Rebound") +
scale_fill_gradient(low = "darkblue",
high = "darkorange") +
labs(caption = "Thank you to Ramiro Bentes for\n providing amazing PBP data"))
plotly::ggplotly(first_plays %>%
filter(lagged_dist <= 36) %>%
mutate(dist_bins = cut(lagged_dist,12)) %>%
group_by(dist_bins) %>%
summarize(transition_rate = mean(transition),
count = n()) %>%
ungroup() %>%
ggplot(aes(x = dist_bins, y = transition_rate)) +
geom_point(aes(size = count)) +
geom_line(group = 1) +
theme(axis.text.x = element_text(angle = 60)) +
labs(x = "Shot Distance Zone", y = "Transition Rate", caption = "Number of Shot Attempts",
title = "Transition Rates by Previous Shot Distance"))
plotly::ggplotly(first_plays %>%
filter(transition==1 & lagged_dist <= 36 & prev_location_y > -30) %>%
mutate(dist_bins = cut(lagged_dist,12)) %>%
group_by(dist_bins) %>%
summarize(ppp = mean(points_on_poss),
count = n()) %>%
ungroup() %>%
ggplot(aes(x = dist_bins, y = ppp)) +
geom_point(aes(size = count)) +
geom_line(group = 1) +
theme(axis.text.x = element_text(angle = 60)) +
labs(x = "Shot Distance Zone (Previous Shot Attempt)", y = "PPP on Possessions with Transition Opportunities", caption = "Number of Shot Attempts", title = "PPP on Transition Rates Binned by Previous Location"))
transition_probs <- first_plays %>%
filter(lagged_dist <= 30) %>%
mutate(dist_bins = cut(lagged_dist,10)) %>%
group_by(dist_bins) %>%
summarize(rate = mean(transition),
count = n()) %>%
ungroup() %>%
mutate(class = "Transition")
transition_efficiency <- first_plays %>%
filter(transition==1 & lagged_dist <= 30 & prev_location_y > -30) %>%
mutate(dist_bins = cut(lagged_dist,10)) %>%
group_by(dist_bins) %>%
summarize(ppp = mean(points_on_poss)) %>%
ungroup() %>%
select(-dist_bins)
transition_table <- cbind(transition_probs, transition_efficiency)
oreb_probs <- fgas %>%
filter(shot_pts==0) %>%
filter(dist <= 30) %>%
mutate(dist_bins = cut(dist,10)) %>%
group_by(dist_bins) %>%
summarize(rate = mean(leads_to_oreb),
count = n()) %>%
ungroup() %>%
mutate(class = "OREB")
oreb_efficiency <- fgas %>%
filter(leads_to_oreb==1) %>%
filter(dist <= 30) %>%
mutate(dist_bins = cut(dist,10)) %>%
group_by(dist_bins) %>%
summarize(ppp = mean(points_on_poss)) %>%
ungroup() %>%
select(-dist_bins)
oreb_table <- cbind(oreb_probs, oreb_efficiency)
final_table <- rbind(transition_table, oreb_table) %>%
mutate(weighted_probs = rate*ppp)
plotly::ggplotly(
final_table %>%
ggplot(aes(x = dist_bins, y = weighted_probs, color = class, group = class)) +
geom_line() +
labs(x = "Distance Bins on PREVIOUS Shot", y = "Weighted Expectations")
)
Another topic that I relates to transition opportunities is a player’s live/dead ball turnover rate. In the NBA, turnovers are not all equal. Dead ball ones, such as offensive charges, passes that go out of bounds, or travels, don’t allow the opposing defense to gain a transition opportunity; instead, they have to take the ball out of bounds and start a brand new possession.
When evaluating players, we don’t break their turnover rates into live and dead ball categories. I believe that given their consistency from year-to-year, and how much more detrimental live-ball ones typically are, we should.
turnovers_2021 <- read.csv("turnovers_2021.csv")
turnovers_2022 <- read.csv("turnovers_2022.csv")
#YEAR-TO-YEAR CORRELATIONS
live_and_dead_2021 <- turnovers_2021 %>%
select(lagged_to_player, lagged_turnover_col, proportion, ppp, total_TOs) %>%
rename(Player = lagged_to_player,
Turnover_Type = lagged_turnover_col,
Proportion_2021 = proportion,
ppp_2021 = ppp,
Total_2021 = total_TOs)
live_and_dead_2022 <- turnovers_2022 %>%
select(lagged_to_player, lagged_turnover_col, proportion, ppp, total_TOs) %>%
rename(Player = lagged_to_player,
Turnover_Type = lagged_turnover_col,
Proportion_2022 = proportion,
ppp_2022 = ppp,
Total_2022 = total_TOs)
turnover_table <- live_and_dead_2021 %>% #only gets players who played both years
left_join(live_and_dead_2022, by = c('Player', 'Turnover_Type'))
head(turnover_table)
## Player Turnover_Type Proportion_2021 ppp_2021 Total_2021
## 1 Aaron Gordon Dead_Ball 0.3298969 0.8750000 97
## 2 Aaron Gordon Live_Ball 0.6701031 1.3230769 97
## 3 Aaron Holiday Dead_Ball 0.3030303 1.1000000 66
## 4 Aaron Holiday Live_Ball 0.6969697 1.1304348 66
## 5 Aaron Nesmith Dead_Ball 0.5217391 0.8333333 23
## 6 Aaron Nesmith Live_Ball 0.4782609 1.3636364 23
## Proportion_2022 ppp_2022 Total_2022
## 1 0.4436090 1.118644 133
## 2 0.5563910 1.391892 133
## 3 0.3582090 1.166667 67
## 4 0.6417910 1.372093 67
## 5 0.4516129 1.071429 31
## 6 0.5483871 1.235294 31
turnover_table %>%
filter(Total_2021 >= 40 & Total_2022 >= 40 & Player != "Team Turnover" & Turnover_Type == "Live_Ball") %>%
ggplot(aes(x = Proportion_2021, y = Proportion_2022, label = Player)) +
geom_point() +
# geom_text() +
labs(x = "Proportion of Turnovers that are Live Ball (2020-2021)",
y = "Proportion of Turnovers that are Live Ball (2021-2022)",
caption = "Among players with at least 40 turnovers;\n correlation = 0.62")
turnover_table %>%
filter(Total_2021 >= 40 & Total_2022 >= 40 & Player != "Team Turnover") %>%
ggplot(aes(x = ppp_2022, fill = Turnover_Type)) +
geom_density(alpha=0.25) +
labs(x = "Points on Possession by Opposing Team Following Turnover",
caption = "Among players with at least 40 turnovers")
View(pbp %>% filter(game_id==22100001))