Abstract

In the modern NBA, floor spacing is a major key to the success of an offensive system. We no longer see offenses that “pack” all five players within the three-point line; today, offensive systems utilize the three-point line to create more spacing to produce easier shots. Floor spacing can be manufactured, but it is heavily dependent on the design of an offensive system. Consequently, shot selection carries equal importance on the success of an offensive system. As Brian Skinner (2012) mentioned in his article The Problem of Shot Selection in Basketball, “the purpose of an offensive set is to generate a high-quality shot opportunity.” Thus, it is important for teams to be aware of a player’s ability to make shots and their shot selection. This study will examine the probability of an NBA player making a shot and what factors impact shot success. I develop a predictive shot model using logistic regression to predict whether a shot will be successful using five concepts:

  1. the zone of where the shot was taken (location),
  2. the distance between the shooter and the nearest defender,
  3. the quarter in which the shot was taken,
  4. touch time, and
  5. whether the shot was off the dribble.

Shot data from the 2021-2022 NBA season was used to train and test the model. Results showed that zone location and the nearest defender distance were the most important factors to shot success. Shot success probabilities were the highest in the restricted area, mid-range shots right outside the paint, and corner threes. This model could be used to help teams with game planning, player development, and roster construction.

2021-2022 NBA Shot Data

df <- read_excel("C:\\Users\\danni\\OneDrive\\Documents\\cap_shots (1).xlsx")

nba <- df %>%
  filter(zoneBasic != 'Backcourt')%>%
  filter(period != 5 & period != 6 &period != 7) %>%
  mutate(off_dribble = recode(dribble_range, 
                              "0 Dribbles" = "0", 
                              "1 Dribble" = "1",
                              "2 Dribbles" = "1",
                              "3-6 Dribbles" = "1",
                              "7+ Dribbles" = "1")) %>%
  mutate(across(c(close_def_dist,off_dribble,touch_time,shot_clock,zoneBasic,period),
                factor))

nba$zoneBasic <- relevel(nba$zoneBasic, ref='Restricted Area')

Testing For Association (Cramer’s V)

## Creating contingency table 


tab<- table(nba$zoneBasic, nba$close_def_dist, nba$touch_time, nba$period, dnn=c('Zone','Closest Defender Distance','Touch Time','Period'))

#converting from table to data frame that features row and column names like a matrix
tab_to_df <- function(x){
  mat <- as.matrix.data.frame(x)
  df <- data.frame(mat)
  rownames(df) <- c('Restricted Area','Above the Break 3','In The Paint (Non-RA)','Left Corner 3','Mid-Range','Right Corner 3')
  colnames(df) <- c('Very Tight (0-2ft)','Tight (2-4ft)','Open (4-6ft)','Wide Open (6ft+)')
  
  return(df)
}



#gt function for stratified tables in presentation format
gt_contin <- function(tbl,tbl.num,t_time,period){
  
  gt.tab <- tbl %>%
  gt::gt(rownames_to_stub = TRUE) %>%
  tab_header(
    title = glue('Stratified Table #{tbl.num}'),
    subtitle = glue('Touch Time < {t_time}, Period = {period}')
  ) %>%
  tab_spanner(
    label = 'Closest Defender Distance',
    columns = c('Very Tight (0-2ft)','Tight (2-4ft)','Open (4-6ft)','Wide Open (6ft+)')) %>%
  tab_stubhead(
    label = 'Zone'
  ) %>%
  gtExtras::gt_theme_nytimes()
  
}

# Placing stratified tables on one plot
tab1 <- tab_to_df(tab[,,1,1])
gt1 <- gt_contin(tab1,1,'2 seconds',1)
#gt1 %>%
 # gtsave("p1.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

tab2 <- tab_to_df(tab[,,2,1])
gt2 <- gt_contin(tab2,2,'2-6 seconds',1)
#gt2 %>%
 # gtsave("p2.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
  
tab3 <- tab_to_df(tab[,,3,1])
gt3 <- gt_contin(tab3,3,'6ft+ seconds',1)
#gt3 %>%
 # gtsave("p3.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')


tab4 <- tab_to_df(tab[,,1,2])
gt4 <- gt_contin(tab4,4,'2 seconds',2)
#gt4 %>%
 # gtsave("p4.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

tab5 <- tab_to_df(tab[,,2,2])
gt5 <- gt_contin(tab5,5,'2-6 seconds',2)
#gt5 %>%
 # gtsave("p5.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
  
tab6 <- tab_to_df(tab[,,3,2])
gt6 <- gt_contin(tab6,6,'6ft+ seconds',2)
#gt6 %>%
 # gtsave("p6.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')


tab7 <- tab_to_df(tab[,,1,3])
gt7 <- gt_contin(tab7,7,'2 seconds',3)
#gt7 %>%
 # gtsave("p7.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

tab8 <- tab_to_df(tab[,,2,3])
gt8 <- gt_contin(tab7,8,'2-6 seconds',3)
#gt8 %>%
 # gtsave("p8.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
  
tab9 <- tab_to_df(tab[,,3,3])
gt9 <- gt_contin(tab7,9,'6ft+ seconds',3)
#gt9 %>%
  #gtsave("p9.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')


tab10 <- tab_to_df(tab[,,1,4])
gt10 <- gt_contin(tab10,10,'2 seconds',4)
#gt10 %>%
  #gtsave("p10.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

tab11 <- tab_to_df(tab[,,2,4])
gt11 <- gt_contin(tab11,11,'2-6 seconds',4)
#gt11 %>%
  #gtsave("p11.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
  
tab12 <- tab_to_df(tab[,,3,4])
gt12 <- gt_contin(tab12,12,'6ft+ seconds',4)
#gt12 %>%
  #gtsave("p12.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

p1 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p1.png", scale = 0.8)
p2 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p2.png", scale = 0.8)
p3 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p3.png", scale = 0.8)
p4 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p4.png", scale = 0.8)
p5 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p5.png", scale =0.8)
p6 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p6.png", scale = 0.8)
p7 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p7.png", scale = 0.8)
p8 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p8.png", scale = 0.8)
p9 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p9.png", scale = 0.8)
p10 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p10.png", scale =0.8)
p11 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p11.png", scale = 0.8)
p12 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\p12.png", scale = 0.8)


pg1 <- plot_grid(
  p1,p2,p3,p4,
   labels= c(1:4),
  align = 'hv')

pg2 <- plot_grid(
  p5,p6,p7,p8,
   labels= c(5:8),
  align = 'hv')

pg3 <-plot_grid(
  p9,p10,p11,p12,
   labels= c(9:12),
  align = 'hv')


######## Cramer's V Table ######

#getting Cramer's V for each stratified table
stats <-assocstats(tab) 

v<-c()
for (i in stats) {
  v <-c(v,round(i[[5]],3))
}

tab.order <- c(1:12)
c.df <- data.frame(tab.order, v )



t <- c.df %>%
  gt() %>%
  cols_label(
    tab.order = "Table #", v = "Cramer's V"
  ) %>%
  gtExtras::gt_color_rows(columns = v, palette = "ggsci::blue_material" ) %>%
  gt::tab_header(title = paste("Cramer's V for Each Contingency Table")) %>%
  tab_options(
    table.font.size = px(20),
    data_row.padding = px(1),
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_body(
      columns = c(1)
    )
  ) %>%
  gtExtras::gt_theme_nytimes() %>%
  gtExtras::fmt_symbol_first(c(v), decimals =3)

pg1

pg2

pg3

t
Cramer's V for Each Contingency Table
Table # Cramer's V
1 0.188
2 0.107
3 0.099
4 0.211
5 0.128
6 0.136
7 0.193
8 0.110
9 0.096
10 0.209
11 0.122
12 0.120
#t %>%
  #gtsave("C:\\Users\\danni\\OneDrive\\Pictures\\cramers.png", expand = 20)

‘Off Dribble’ is not included in the correlations since–as we’ll see in the next section–the ‘Off Dribble’ predictor is insignificant to our model, therefore, it can be removed from the model. Only factors that were significant to the final model are included in the correlations test.

Shot Prediction Model (Logistic Regression)

set.seed(1234)
split = initial_split(data=nba,prop=0.75)
Train = training(split)
Test = testing(split)



nba.m<- glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + period + off_dribble, data=Train, family = 'binomial') 
summary(nba.m)
## 
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time + 
##     period + off_dribble, family = "binomial", data = Train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.511  -1.020  -0.910   1.278   1.584  
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        4.785e-01  2.313e-02  20.689  < 2e-16 ***
## zoneBasicAbove the Break 3        -1.278e+00  1.448e-02 -88.259  < 2e-16 ***
## zoneBasicIn The Paint (Non-RA)    -9.119e-01  1.574e-02 -57.918  < 2e-16 ***
## zoneBasicLeft Corner 3            -1.112e+00  2.564e-02 -43.377  < 2e-16 ***
## zoneBasicMid-Range                -1.022e+00  1.794e-02 -56.983  < 2e-16 ***
## zoneBasicRight Corner 3           -1.134e+00  2.685e-02 -42.225  < 2e-16 ***
## close_def_dist2-4 Feet - Tight     2.136e-01  2.084e-02  10.250  < 2e-16 ***
## close_def_dist4-6 Feet - Open      2.774e-01  2.168e-02  12.798  < 2e-16 ***
## close_def_dist6+ Feet - Wide Open  1.970e-01  2.310e-02   8.529  < 2e-16 ***
## touch_timeTouch 2-6 Seconds       -5.303e-02  1.896e-02  -2.796  0.00517 ** 
## touch_timeTouch 6+ Seconds        -2.606e-03  2.216e-02  -0.118  0.90640    
## period2                           -2.532e-02  1.482e-02  -1.708  0.08759 .  
## period3                           -1.501e-02  1.489e-02  -1.008  0.31354    
## period4                           -6.539e-02  1.507e-02  -4.340 1.43e-05 ***
## off_dribble1                      -6.987e-05  1.879e-02  -0.004  0.99703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 210131  on 152112  degrees of freedom
## Residual deviance: 200343  on 152098  degrees of freedom
## AIC: 200373
## 
## Number of Fisher Scoring iterations: 4

All levels for the Zone and Closest Defender Distance categories are significant. However, for both Touch Time and Period, only one level is significant and Off Dribble is insignificant as well. We can make a decision to include these variables by building models without those categories and then checking a likelihood ratio test.

#without touch time
nba.m2 <-glm(isShotMade ~ zoneBasic + close_def_dist + period + off_dribble, data=Train, family = 'binomial')
summary(nba.m2)
## 
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + period + 
##     off_dribble, family = "binomial", data = Train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5094  -1.0218  -0.9101   1.2801   1.5736  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.47528    0.02311  20.566  < 2e-16 ***
## zoneBasicAbove the Break 3        -1.27720    0.01447 -88.254  < 2e-16 ***
## zoneBasicIn The Paint (Non-RA)    -0.91197    0.01573 -57.971  < 2e-16 ***
## zoneBasicLeft Corner 3            -1.11187    0.02564 -43.363  < 2e-16 ***
## zoneBasicMid-Range                -1.02221    0.01791 -57.059  < 2e-16 ***
## zoneBasicRight Corner 3           -1.13315    0.02684 -42.214  < 2e-16 ***
## close_def_dist2-4 Feet - Tight     0.21313    0.02084  10.227  < 2e-16 ***
## close_def_dist4-6 Feet - Open      0.27816    0.02167  12.836  < 2e-16 ***
## close_def_dist6+ Feet - Wide Open  0.19899    0.02309   8.618  < 2e-16 ***
## period2                           -0.02483    0.01482  -1.675   0.0938 .  
## period3                           -0.01465    0.01489  -0.983   0.3254    
## period4                           -0.06454    0.01506  -4.285 1.83e-05 ***
## off_dribble1                      -0.02925    0.01155  -2.533   0.0113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 210131  on 152112  degrees of freedom
## Residual deviance: 200356  on 152100  degrees of freedom
## AIC: 200382
## 
## Number of Fisher Scoring iterations: 4
#without period
nba.m3 <-glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + off_dribble, data=Train, family = 'binomial') 
summary(nba.m3)
## 
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time + 
##     off_dribble, family = "binomial", data = Train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4989  -1.0200  -0.9111   1.2830   1.5657  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.452597   0.021288  21.260   <2e-16 ***
## zoneBasicAbove the Break 3        -1.277820   0.014477 -88.264   <2e-16 ***
## zoneBasicIn The Paint (Non-RA)    -0.911078   0.015742 -57.874   <2e-16 ***
## zoneBasicLeft Corner 3            -1.112178   0.025641 -43.376   <2e-16 ***
## zoneBasicMid-Range                -1.020960   0.017937 -56.921   <2e-16 ***
## zoneBasicRight Corner 3           -1.133379   0.026844 -42.221   <2e-16 ***
## close_def_dist2-4 Feet - Tight     0.213715   0.020844  10.253   <2e-16 ***
## close_def_dist4-6 Feet - Open      0.277435   0.021675  12.800   <2e-16 ***
## close_def_dist6+ Feet - Wide Open  0.197349   0.023100   8.543   <2e-16 ***
## touch_timeTouch 2-6 Seconds       -0.052988   0.018961  -2.795   0.0052 ** 
## touch_timeTouch 6+ Seconds        -0.004185   0.022156  -0.189   0.8502    
## off_dribble1                      -0.000115   0.018787  -0.006   0.9951    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 210131  on 152112  degrees of freedom
## Residual deviance: 200363  on 152101  degrees of freedom
## AIC: 200387
## 
## Number of Fisher Scoring iterations: 4
#without off_dribble
nba.m4<-glm(isShotMade ~ zoneBasic + close_def_dist + touch_time + period, data=Train, family = 'binomial') 
summary(nba.m4)
## 
## Call:
## glm(formula = isShotMade ~ zoneBasic + close_def_dist + touch_time + 
##     period, family = "binomial", data = Train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.511  -1.020  -0.910   1.278   1.584  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.478525   0.022826  20.964  < 2e-16 ***
## zoneBasicAbove the Break 3        -1.277857   0.014477 -88.267  < 2e-16 ***
## zoneBasicIn The Paint (Non-RA)    -0.911915   0.015734 -57.959  < 2e-16 ***
## zoneBasicLeft Corner 3            -1.112284   0.025639 -43.382  < 2e-16 ***
## zoneBasicMid-Range                -1.022460   0.017929 -57.029  < 2e-16 ***
## zoneBasicRight Corner 3           -1.133558   0.026843 -42.229  < 2e-16 ***
## close_def_dist2-4 Feet - Tight     0.213639   0.020843  10.250  < 2e-16 ***
## close_def_dist4-6 Feet - Open      0.277403   0.021656  12.810  < 2e-16 ***
## close_def_dist6+ Feet - Wide Open  0.197034   0.022985   8.572  < 2e-16 ***
## touch_timeTouch 2-6 Seconds       -0.053082   0.012433  -4.269 1.96e-05 ***
## touch_timeTouch 6+ Seconds        -0.002662   0.016204  -0.164   0.8695    
## period2                           -0.025320   0.014822  -1.708   0.0876 .  
## period3                           -0.015009   0.014893  -1.008   0.3135    
## period4                           -0.065391   0.015067  -4.340 1.43e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 210131  on 152112  degrees of freedom
## Residual deviance: 200343  on 152099  degrees of freedom
## AIC: 200371
## 
## Number of Fisher Scoring iterations: 4
#2nd model anova
m2 <- anova(nba.m,nba.m2, test='LRT')#should be included since p-value less than 0.05

#3rd model anova
m3 <- anova(nba.m,nba.m3, test='LRT')#should be included since p-value less than 0.05

#4th model anova
m4 <- anova(nba.m,nba.m4, test='LRT') #should NOT be included since p-value greater than 0.05

anova2 <- m2 %>%
  gt() %>%
  tab_header(
    title = 'ANOVA for Model Without Touch Time'
  ) %>%
  gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='green', alpha = 0.5) %>%
  gtExtras::gt_theme_nytimes()
anova2 %>%
  gtsave("anova2.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')
  


anova3 <- m3 %>%
  gt() %>%
  tab_header(
    title = 'ANOVA for Model Without Period'
  ) %>%
  gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='green', alpha = 0.5) %>%
  gtExtras::gt_theme_nytimes()

anova3 %>%
  gtsave("anova3.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

anova4 <- m4 %>%
  gt() %>%
  tab_header(
    title = 'ANOVA for Model Without Off Dribble'
  ) %>%
  gtExtras::gt_highlight_cols(`Pr(>Chi)`, fill='red', alpha = 0.5) %>%
  gtExtras::gt_theme_nytimes()

anova4 %>%
  gtsave("anova4.png", path = 'C:\\Users\\danni\\OneDrive\\Pictures')

anova.p2 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova2.png", scale = 0.8)

anova.p3 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova3.png", scale = 0.8)

anova.p4 <- ggdraw() + draw_image("C:\\Users\\danni\\OneDrive\\Pictures\\anova4.png", scale = 0.8)

an.pg <- plot_grid(
  anova.p2,anova.p3,anova.p4,
   labels= c(1:4),
  align = 'hv')

an.pg

tbl_reg_nba <- nba.m4 %>%
  tbl_regression() %>%
  as_gt() %>%
  gt::tab_header("Summary Table of NBA Shot Model Results ") %>% 
  tab_options(
    table.font.size = px(20),
    data_row.padding = px(1),
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_body(
      columns = c(1)
    )
  ) %>%
  gtExtras::gt_theme_nytimes() 
  
tbl_reg_nba
Summary Table of NBA Shot Model Results
Characteristic log(OR)1 95% CI1 p-value
zoneBasic
    Restricted Area
    Above the Break 3 -1.3 -1.3, -1.2 <0.001
    In The Paint (Non-RA) -0.91 -0.94, -0.88 <0.001
    Left Corner 3 -1.1 -1.2, -1.1 <0.001
    Mid-Range -1.0 -1.1, -0.99 <0.001
    Right Corner 3 -1.1 -1.2, -1.1 <0.001
close_def_dist
    0-2 Feet - Very Tight
    2-4 Feet - Tight 0.21 0.17, 0.25 <0.001
    4-6 Feet - Open 0.28 0.23, 0.32 <0.001
    6+ Feet - Wide Open 0.20 0.15, 0.24 <0.001
touch_time
    Touch < 2 Seconds
    Touch 2-6 Seconds -0.05 -0.08, -0.03 <0.001
    Touch 6+ Seconds 0.00 -0.03, 0.03 0.9
period
    1
    2 -0.03 -0.05, 0.00 0.088
    3 -0.02 -0.04, 0.01 0.3
    4 -0.07 -0.09, -0.04 <0.001
1 OR = Odds Ratio, CI = Confidence Interval
#tbl_reg_nba %>%
  #gtsave("C:\\Users\\danni\\OneDrive\\Pictures\\NBA log   results2.png", expand = 20)

Confusion Matrix and AUC Curve

predict_reg3 <- predict(nba.m4, 
                        Test, type = "response")

Test$prob <- c(predict_reg3) #adding probabilities to dataframe

predict_reg4 <- ifelse(predict_reg3 >0.5, 1, 0) ##changing probabilities

#confusion matrix
table(Test$isShotMade, predict_reg4) 
##        predict_reg4
##             0     1
##   FALSE 22267  5105
##   TRUE  13742  9591
#prediction accuracy
missing_classerr <- mean(predict_reg4 != Test$isShotMade)
print(paste('Accuracy =', 1 - missing_classerr)) 
## [1] "Accuracy = 0.628300956513164"
ROCPred <- prediction(predict_reg4, Test$isShotMade) 
ROCPer <- performance(ROCPred, measure = "tpr", 
                      x.measure = "fpr")
#area under curve
auc <- performance(ROCPred, measure = "auc") 
auc <- auc@y.values[[1]]
auc
## [1] 0.6122721
#Plotting Curve
plot(ROCPer, colorize = TRUE, 
     print.cutoffs.at = seq(0.1, by = 0.1), 
     main = "ROC CURVE")
abline(a = 0, b = 1)
   
auc <- round(auc, 4)
legend(.6, .4, auc, title = "AUC", cex = 1)

#Variable Importance

##variable importance



vi(nba.m4) #variable importance table
## # A tibble: 13 × 3
##    Variable                          Importance Sign 
##    <chr>                                  <dbl> <chr>
##  1 zoneBasicAbove the Break 3            88.3   NEG  
##  2 zoneBasicIn The Paint (Non-RA)        58.0   NEG  
##  3 zoneBasicMid-Range                    57.0   NEG  
##  4 zoneBasicLeft Corner 3                43.4   NEG  
##  5 zoneBasicRight Corner 3               42.2   NEG  
##  6 close_def_dist4-6 Feet - Open         12.8   POS  
##  7 close_def_dist2-4 Feet - Tight        10.2   POS  
##  8 close_def_dist6+ Feet - Wide Open      8.57  POS  
##  9 period4                                4.34  NEG  
## 10 touch_timeTouch 2-6 Seconds            4.27  NEG  
## 11 period2                                1.71  NEG  
## 12 period3                                1.01  NEG  
## 13 touch_timeTouch 6+ Seconds             0.164 NEG
vip(nba.m4) #variable importance plot (only includes varaibles that were significant to the model)

Function for Probability Plots

prob_charts <- function(df,prob_col,title){
  mid<- mean(prob_col)
  
  nba_plot <- ggplot(df, aes(x=locationX, y=locationY, color = prob)) + geom_point() +
    scale_colour_gradient2('Probability',low='blue', mid = 'white', high = 'red',
                           midpoint = mid,space='Lab') +
    theme(plot.title = element_text( hjust=.4,size = 20, family = "Comic Sans MS", face = "bold", vjust = 0, colour = "lightgrey"),
          plot.margin=unit(c(1,2,1,1),"cm"),
          plot.background = element_rect(fill = 'gray15', color = 'gray15'),
          panel.background = element_rect(fill = 'gray15', color = 'gray15'),
          panel.grid.major = element_line(colour= 'gray20'),
          panel.grid.minor = element_blank(),
          axis.line = element_line(colour="lightgrey"),
          axis.text = element_text(family = "Comic Sans MS", face = "bold",colour="lightgrey"),
          axis.title = element_text(family = "Comic Sans MS", face = "bold",colour="lightgrey"),
          legend.background = element_blank(),
          legend.text = element_text(hjust = .70, size = 10, family = "Comic Sans MS", face = "bold", colour = "white"),
          legend.title = element_text(family = "Comic Sans MS", face = "bold",colour='lightgrey')
    ) +
    ggtitle(title)
  
  ggdraw(nba_plot) +
    draw_image('https://cdn.nba.com/manage/2021/07/NBA_75-690x588.jpg',
               x=.80,y=.73 ,height=.12, width=.12)
}

Probability Plot for All Shots

nba_plot <- prob_charts(df=Test, prob_col=Test$prob,title="2021-2022 NBA Shot Probabilities")

nba_plot

Probability Plot for 3 Pointers

##3 pointers##
nba_3 <- subset(Test, zoneBasic == "Above the Break 3" |
                  zoneBasic =='Right Corner 3' | 
                  zoneBasic =='Left Corner 3')


nba_plot_3 <- prob_charts(df=nba_3,prob_col= nba_3$prob,title="2021-2022 NBA 3 Point Shot Probabilities")

nba_plot_3

Probability Plot for Mid-Range Shots

nba_mid_range <- subset(Test, zoneBasic == "Mid-Range")

nba_plot_2 <- prob_charts(df=nba_mid_range,prob_col= nba_mid_range$prob,title="2021-2022 NBA Mid-Range Shot Probabilities")

nba_plot_2